Search in sources :

Example 6 with TableStatistics

use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.

the class TestDataCenterClient method testGetTableStatistics.

@Test
public void testGetTableStatistics() {
    Map<String, ColumnHandle> columnHandles = new LinkedHashMap<>();
    DataCenterClient client = new DataCenterClient(this.config, httpClient, typeManager);
    columnHandles.put("orderkey", new DataCenterColumnHandle("orderkey", DOUBLE, 0));
    columnHandles.put("custkey", new DataCenterColumnHandle("custkey", DOUBLE, 1));
    columnHandles.put("orderstatus", new DataCenterColumnHandle("orderstatus", createVarcharType(1), 2));
    columnHandles.put("totalprice", new DataCenterColumnHandle("totalprice", DOUBLE, 3));
    columnHandles.put("orderdate", new DataCenterColumnHandle("orderdate", DATE, 4));
    columnHandles.put("orderpriority", new DataCenterColumnHandle("orderpriority", createVarcharType(15), 5));
    columnHandles.put("clerk", new DataCenterColumnHandle("clerk", createUnboundedVarcharType(), 6));
    columnHandles.put("shippriority", new DataCenterColumnHandle("shippriority", DOUBLE, 7));
    columnHandles.put("comment", new DataCenterColumnHandle("comment", createVarcharType(79), 8));
    TableStatistics tableStatistics = client.getTableStatistics("tpch.tiny.orders", columnHandles);
    assertEquals(tableStatistics.getRowCount().getValue(), 15000.0);
    Map<ColumnHandle, ColumnStatistics> columnStatistics = tableStatistics.getColumnStatistics();
    for (Map.Entry<ColumnHandle, ColumnStatistics> columnstatistics : columnStatistics.entrySet()) {
        ColumnHandle columnhandleKey = columnstatistics.getKey();
        ColumnStatistics columnhandleValue = columnstatistics.getValue();
        if (columnhandleKey.getColumnName().equals("orderkey")) {
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 15000.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
            assertEquals(columnhandleValue.getRange().get().getMin(), (double) 1);
            assertEquals(columnhandleValue.getRange().get().getMax(), (double) 60000);
        }
        if (columnhandleKey.getColumnName().equals("custkey")) {
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1000.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
            assertEquals(columnhandleValue.getRange().get().getMin(), (double) 1);
            assertEquals(columnhandleValue.getRange().get().getMax(), (double) 1499);
        }
        if (columnhandleKey.getColumnName().equals("orderstatus")) {
            assertEquals(columnhandleValue.getDataSize().getValue(), 3.0);
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 3.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
        }
        if (columnhandleKey.getColumnName().equals("totalprice")) {
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 14996.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
            assertEquals(columnhandleValue.getRange().get().getMin(), 874.89);
            assertEquals(columnhandleValue.getRange().get().getMax(), 466001.28);
        }
        if (columnhandleKey.getColumnName().equals("orderdate")) {
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 2401.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
            assertEquals(columnhandleValue.getRange().get().getMin(), (double) 8035);
            assertEquals(columnhandleValue.getRange().get().getMax(), (double) 10440);
        }
        if (columnhandleKey.getColumnName().equals("orderpriority")) {
            assertEquals(columnhandleValue.getDataSize().getValue(), 42.0);
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 5.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
        }
        if (columnhandleKey.getColumnName().equals("clerk")) {
            assertEquals(columnhandleValue.getDataSize().getValue(), 15000.0);
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1000.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
        }
        if (columnhandleKey.getColumnName().equals("shippriority")) {
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
            assertEquals(columnhandleValue.getRange().get().getMin(), (double) 0);
            assertEquals(columnhandleValue.getRange().get().getMax(), (double) 0);
        }
        if (columnhandleKey.getColumnName().equals("comment")) {
            assertEquals(columnhandleValue.getDataSize().getValue(), 727249.0);
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 14995.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
        }
    }
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) TableStatistics(io.prestosql.spi.statistics.TableStatistics) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) DataCenterClient(io.hetu.core.plugin.datacenter.client.DataCenterClient) Test(org.testng.annotations.Test)

Example 7 with TableStatistics

use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.

the class DataCenterClient method getTableStatistics.

/**
 * Get remote table statistics.
 *
 * @param tableFullName the fully qualified table name
 * @param columnHandles data center column handles
 * @return the table statistics
 */
public TableStatistics getTableStatistics(String tableFullName, Map<String, ColumnHandle> columnHandles) {
    String query = "SHOW STATS FOR " + tableFullName;
    Iterable<List<Object>> data;
    try {
        data = getResults(clientSession, query);
    } catch (SQLException ex) {
        throw new PrestoTransportException(REMOTE_TASK_ERROR, HostAddress.fromUri(this.serverUri.uri()), "could not connect to the remote data center");
    }
    TableStatistics.Builder builder = TableStatistics.builder();
    List<Object> lastRow = null;
    for (List<Object> row : data) {
        ColumnStatistics.Builder columnStatisticBuilder = new ColumnStatistics.Builder();
        lastRow = row;
        if (row.get(0) == null) {
            // Only the last row can have the first column (column name) null
            continue;
        }
        // row[0] is column_name
        DataCenterColumnHandle columnHandle = (DataCenterColumnHandle) columnHandles.get(row.get(0).toString());
        if (columnHandle == null) {
            // Unknown column found
            continue;
        }
        // row[1] is data_size
        if (row.get(1) != null) {
            columnStatisticBuilder.setDataSize(Estimate.of(Double.parseDouble(row.get(1).toString())));
        }
        // row[2] is distinct_values_count
        if (row.get(2) != null) {
            columnStatisticBuilder.setDistinctValuesCount(Estimate.of(Double.parseDouble(row.get(2).toString())));
        }
        // row[3] is nulls_fraction
        if (row.get(3) != null) {
            columnStatisticBuilder.setNullsFraction(Estimate.of(Double.parseDouble(row.get(3).toString())));
        }
        // row[5] is low_value and row[6] is high_value
        if (row.get(5) != null && row.get(6) != null) {
            String minStr = row.get(5).toString();
            String maxStr = row.get(6).toString();
            Type columnType = columnHandle.getColumnType();
            if (columnType.equals(DATE)) {
                LocalDate minDate = LocalDate.parse(minStr, DATE_FORMATTER);
                LocalDate maxDate = LocalDate.parse(maxStr, DATE_FORMATTER);
                columnStatisticBuilder.setRange(new DoubleRange(minDate.toEpochDay(), maxDate.toEpochDay()));
            } else {
                columnStatisticBuilder.setRange(new DoubleRange(Double.parseDouble(minStr), Double.parseDouble(maxStr)));
            }
        }
        builder.setColumnStatistics(columnHandle, columnStatisticBuilder.build());
    }
    // Get row_count from the last row
    if (lastRow != null && lastRow.get(4) != null) {
        builder.setRowCount(Estimate.of(Double.parseDouble(lastRow.get(4).toString())));
    }
    return builder.build();
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) SQLException(java.sql.SQLException) LocalDate(java.time.LocalDate) DoubleRange(io.prestosql.spi.statistics.DoubleRange) Type(io.prestosql.spi.type.Type) TypeUtil.parseType(io.prestosql.client.util.TypeUtil.parseType) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) TableStatistics(io.prestosql.spi.statistics.TableStatistics) PrestoTransportException(io.prestosql.spi.PrestoTransportException) DataCenterColumnHandle(io.hetu.core.plugin.datacenter.DataCenterColumnHandle)

Example 8 with TableStatistics

use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.

the class AbstractTestHive method assertTableStatsComputed.

private void assertTableStatsComputed(SchemaTableName tableName, Set<String> expectedColumnStatsColumns) {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Constraint.alwaysTrue(), true);
        assertFalse(tableStatistics.getRowCount().isUnknown(), "row count is unknown");
        Map<String, ColumnStatistics> columnsStatistics = tableStatistics.getColumnStatistics().entrySet().stream().collect(toImmutableMap(entry -> ((HiveColumnHandle) entry.getKey()).getName(), Map.Entry::getValue));
        assertEquals(columnsStatistics.keySet(), expectedColumnStatsColumns, "columns with statistics");
        Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
        columnsStatistics.forEach((columnName, columnStatistics) -> {
            ColumnHandle columnHandle = columnHandles.get(columnName);
            Type columnType = metadata.getColumnMetadata(session, tableHandle, columnHandle).getType();
            assertFalse(columnStatistics.getNullsFraction().isUnknown(), "unknown nulls fraction for " + columnName);
            assertFalse(columnStatistics.getDistinctValuesCount().isUnknown(), "unknown distinct values count for " + columnName);
            if (isVarcharType(columnType)) {
                assertFalse(columnStatistics.getDataSize().isUnknown(), "unknown data size for " + columnName);
            } else {
                assertTrue(columnStatistics.getDataSize().isUnknown(), "unknown data size for" + columnName);
            }
        });
    }
}
Also used : HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) ROLLBACK_AFTER_FINISH_INSERT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT) HiveType.toHiveType(io.prestosql.plugin.hive.HiveType.toHiveType) TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.testng.annotations.Test) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) FileStatus(org.apache.hadoop.fs.FileStatus) TEXTFILE(io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE) TypeSignature.parseTypeSignature(io.prestosql.spi.type.TypeSignature.parseTypeSignature) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Map(java.util.Map) RowType(io.prestosql.spi.type.RowType) ENGLISH(java.util.Locale.ENGLISH) Assert.assertFalse(org.testng.Assert.assertFalse) Chars.isCharType(io.prestosql.spi.type.Chars.isCharType) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) RCTEXT(io.prestosql.plugin.hive.HiveStorageFormat.RCTEXT) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) Table(io.prestosql.plugin.hive.metastore.Table) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) HiveBasicStatistics.createZeroStatistics(io.prestosql.plugin.hive.HiveBasicStatistics.createZeroStatistics) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) TYPE_MANAGER(io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER) MetastoreLocator(io.prestosql.plugin.hive.metastore.thrift.MetastoreLocator) LocalDateTime(java.time.LocalDateTime) PRESTO_QUERY_ID_NAME(io.prestosql.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ThriftHiveMetastoreConfig(io.prestosql.plugin.hive.metastore.thrift.ThriftHiveMetastoreConfig) OptionalLong(java.util.OptionalLong) REGULAR(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) PARTITION_KEY(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) ThriftHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.ThriftHiveMetastore) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) Assertions.assertGreaterThanOrEqual(io.airlift.testing.Assertions.assertGreaterThanOrEqual) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) PARQUET(io.prestosql.plugin.hive.HiveStorageFormat.PARQUET) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) AfterClass(org.testng.annotations.AfterClass) HiveTestUtils.mapType(io.prestosql.plugin.hive.HiveTestUtils.mapType) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ROLLBACK_RIGHT_AWAY(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY) HostAndPort(com.google.common.net.HostAndPort) USER(io.prestosql.spi.security.PrincipalType.USER) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) VARBINARY(io.prestosql.spi.type.VarbinaryType.VARBINARY) HiveTestUtils.getDefaultOrcFileWriterFactory(io.prestosql.plugin.hive.HiveTestUtils.getDefaultOrcFileWriterFactory) ConnectorPageSourceProvider(io.prestosql.spi.connector.ConnectorPageSourceProvider) ROLLBACK_AFTER_APPEND_PAGE(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE) Varchars.isVarcharType(io.prestosql.spi.type.Varchars.isVarcharType) ConnectorSplitManager(io.prestosql.spi.connector.ConnectorSplitManager) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) MaterializedResult.materializeSourceDataStream(io.prestosql.testing.MaterializedResult.materializeSourceDataStream) MaterializedResult(io.prestosql.testing.MaterializedResult) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) Type(io.prestosql.spi.type.Type) RcFilePageSource(io.prestosql.plugin.hive.rcfile.RcFilePageSource) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) DecimalType.createDecimalType(io.prestosql.spi.type.DecimalType.createDecimalType) PrestoException(io.prestosql.spi.PrestoException) HiveBasicStatistics.createEmptyStatistics(io.prestosql.plugin.hive.HiveBasicStatistics.createEmptyStatistics) ImmutableSet(com.google.common.collect.ImmutableSet) CachingHiveMetastore(io.prestosql.plugin.hive.metastore.CachingHiveMetastore) MetadataManager.createTestMetadataManager(io.prestosql.metadata.MetadataManager.createTestMetadataManager) ROLLBACK_AFTER_DELETE(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) UUID(java.util.UUID) TINYINT(io.prestosql.spi.type.TinyintType.TINYINT) Assert.assertNotNull(org.testng.Assert.assertNotNull) HYPER_LOG_LOG(io.prestosql.spi.type.HyperLogLogType.HYPER_LOG_LOG) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) NOT_PARTITIONED(io.prestosql.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) SqlTimestamp(io.prestosql.spi.type.SqlTimestamp) BUCKET_COLUMN_NAME(io.prestosql.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) Assert.assertNull(org.testng.Assert.assertNull) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) SqlDate(io.prestosql.spi.type.SqlDate) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) OptionalDouble(java.util.OptionalDouble) Assert.assertEquals(org.testng.Assert.assertEquals) BUCKETED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) OptionalInt(java.util.OptionalInt) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) ViewColumn(io.prestosql.spi.connector.ConnectorViewDefinition.ViewColumn) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) DATE(io.prestosql.spi.type.DateType.DATE) Math.toIntExact(java.lang.Math.toIntExact) STORAGE_FORMAT_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) Block(io.prestosql.spi.block.Block) ExecutorService(java.util.concurrent.ExecutorService) Collections.emptyMap(java.util.Collections.emptyMap) ParquetPageSource(io.prestosql.plugin.hive.parquet.ParquetPageSource) UTF_8(java.nio.charset.StandardCharsets.UTF_8) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Assert.fail(org.testng.Assert.fail) DateTime(org.joda.time.DateTime) PartitionWithStatistics(io.prestosql.plugin.hive.metastore.PartitionWithStatistics) Page(io.prestosql.spi.Page) HiveTestUtils.getDefaultHiveDataStreamFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveDataStreamFactories) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) Hashing.sha256(com.google.common.hash.Hashing.sha256) BUCKETING_V1(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion.BUCKETING_V1) Assertions.assertEqualsIgnoreOrder(io.airlift.testing.Assertions.assertEqualsIgnoreOrder) PARTITIONED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) JoinCompiler(io.prestosql.sql.gen.JoinCompiler) Assert.assertTrue(org.testng.Assert.assertTrue) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) OrcConcatPageSource(io.prestosql.plugin.hive.orc.OrcConcatPageSource) ROLLBACK_AFTER_BEGIN_INSERT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT) Arrays(java.util.Arrays) RCBINARY(io.prestosql.plugin.hive.HiveStorageFormat.RCBINARY) NoHdfsAuthentication(io.prestosql.plugin.hive.authentication.NoHdfsAuthentication) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ValueSet(io.prestosql.spi.predicate.ValueSet) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) BigDecimal(java.math.BigDecimal) Sets.difference(com.google.common.collect.Sets.difference) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) HIVE_STRING(io.prestosql.plugin.hive.HiveType.HIVE_STRING) RowFieldName(io.prestosql.spi.type.RowFieldName) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ConnectorPageSinkProvider(io.prestosql.spi.connector.ConnectorPageSinkProvider) JSON(io.prestosql.plugin.hive.HiveStorageFormat.JSON) HIVE_INT(io.prestosql.plugin.hive.HiveType.HIVE_INT) HIVE_LONG(io.prestosql.plugin.hive.HiveType.HIVE_LONG) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) UNGROUPED_SCHEDULING(io.prestosql.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SqlStandardAccessControlMetadata(io.prestosql.plugin.hive.security.SqlStandardAccessControlMetadata) TIMESTAMP(io.prestosql.spi.type.TimestampType.TIMESTAMP) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) VarcharType.createVarcharType(io.prestosql.spi.type.VarcharType.createVarcharType) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Domain(io.prestosql.spi.predicate.Domain) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) TestingNodeManager(io.prestosql.testing.TestingNodeManager) Lists.reverse(com.google.common.collect.Lists.reverse) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) StandardTypes(io.prestosql.spi.type.StandardTypes) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) BUCKET_COUNT_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) MapType(io.prestosql.spi.type.MapType) GroupByHashPageIndexerFactory(io.prestosql.GroupByHashPageIndexerFactory) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) AVRO(io.prestosql.plugin.hive.HiveStorageFormat.AVRO) HiveTestUtils.rowType(io.prestosql.plugin.hive.HiveTestUtils.rowType) RecordCursor(io.prestosql.spi.connector.RecordCursor) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) SESSION(io.prestosql.plugin.hive.HiveTestUtils.SESSION) HiveMetastore(io.prestosql.plugin.hive.metastore.HiveMetastore) LongStream(java.util.stream.LongStream) MULTIDELIMIT(io.prestosql.plugin.hive.HiveStorageFormat.MULTIDELIMIT) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) PAGE_SORTER(io.prestosql.plugin.hive.HiveTestUtils.PAGE_SORTER) UTC(org.joda.time.DateTimeZone.UTC) MaterializedRow(io.prestosql.testing.MaterializedRow) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) DateTimeTestingUtils.sqlTimestampOf(io.prestosql.testing.DateTimeTestingUtils.sqlTimestampOf) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) STAGE_AND_MOVE_TO_TARGET_DIRECTORY(io.prestosql.plugin.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY) TableType(org.apache.hadoop.hive.metastore.TableType) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) OrcPageSource(io.prestosql.plugin.hive.orc.OrcPageSource) HiveTestUtils.getDefaultHiveSelectiveFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveSelectiveFactories) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) Iterables.concat(com.google.common.collect.Iterables.concat) HiveWriteUtils.createDirectory(io.prestosql.plugin.hive.HiveWriteUtils.createDirectory) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) Constraint(io.prestosql.spi.connector.Constraint) ImmutableMap(com.google.common.collect.ImmutableMap) ArrayType(io.prestosql.spi.type.ArrayType) CharType.createCharType(io.prestosql.spi.type.CharType.createCharType) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) HiveTestUtils.getDefaultHiveFileWriterFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) PRESTO_VERSION_NAME(io.prestosql.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) List(java.util.List) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) HiveTestUtils.getTypes(io.prestosql.plugin.hive.HiveTestUtils.getTypes) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) SORTED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) Logger(io.airlift.log.Logger) CounterStat(io.airlift.stats.CounterStat) HashMap(java.util.HashMap) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) AtomicReference(java.util.concurrent.atomic.AtomicReference) SqlVarbinary(io.prestosql.spi.type.SqlVarbinary) BridgingHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.BridgingHiveMetastore) NamedTypeSignature(io.prestosql.spi.type.NamedTypeSignature) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) COMMIT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.COMMIT) TestingMetastoreLocator(io.prestosql.plugin.hive.metastore.thrift.TestingMetastoreLocator) Verify.verify(com.google.common.base.Verify.verify) Assertions.assertLessThanOrEqual(io.airlift.testing.Assertions.assertLessThanOrEqual) Range(io.prestosql.spi.predicate.Range) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) HivePrivilegeInfo(io.prestosql.plugin.hive.metastore.HivePrivilegeInfo) Objects.requireNonNull(java.util.Objects.requireNonNull) SEQUENCEFILE(io.prestosql.plugin.hive.HiveStorageFormat.SEQUENCEFILE) REAL(io.prestosql.spi.type.RealType.REAL) HiveMetadata.convertToPredicate(io.prestosql.plugin.hive.HiveMetadata.convertToPredicate) ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) HiveTestUtils.getNoOpIndexCache(io.prestosql.plugin.hive.HiveTestUtils.getNoOpIndexCache) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TRANSACTION_CONFLICT(io.prestosql.spi.StandardErrorCode.TRANSACTION_CONFLICT) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) CSV(io.prestosql.plugin.hive.HiveStorageFormat.CSV) HiveTestUtils.getDefaultHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProvider) HiveTestUtils.arrayType(io.prestosql.plugin.hive.HiveTestUtils.arrayType) SMALLINT(io.prestosql.spi.type.SmallintType.SMALLINT) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ROLLBACK_AFTER_SINK_FINISH(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) HiveType.toHiveType(io.prestosql.plugin.hive.HiveType.toHiveType) RowType(io.prestosql.spi.type.RowType) Chars.isCharType(io.prestosql.spi.type.Chars.isCharType) HiveTestUtils.mapType(io.prestosql.plugin.hive.HiveTestUtils.mapType) Varchars.isVarcharType(io.prestosql.spi.type.Varchars.isVarcharType) Type(io.prestosql.spi.type.Type) DecimalType.createDecimalType(io.prestosql.spi.type.DecimalType.createDecimalType) VarcharType.createVarcharType(io.prestosql.spi.type.VarcharType.createVarcharType) MapType(io.prestosql.spi.type.MapType) HiveTestUtils.rowType(io.prestosql.plugin.hive.HiveTestUtils.rowType) TableType(org.apache.hadoop.hive.metastore.TableType) ArrayType(io.prestosql.spi.type.ArrayType) CharType.createCharType(io.prestosql.spi.type.CharType.createCharType) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) HiveTestUtils.arrayType(io.prestosql.plugin.hive.HiveTestUtils.arrayType) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) TableStatistics(io.prestosql.spi.statistics.TableStatistics) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Map(java.util.Map) Collections.emptyMap(java.util.Collections.emptyMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 9 with TableStatistics

use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.

the class AbstractTestHive method testPartitionStatisticsSampling.

protected void testPartitionStatisticsSampling(List<ColumnMetadata> columns, PartitionStatistics statistics) throws Exception {
    SchemaTableName tableName = temporaryTable("test_partition_statistics_sampling");
    try {
        createDummyPartitionedTable(tableName, columns);
        HiveMetastore hiveMetastoreClient = getMetastoreClient();
        HiveIdentity identity = new HiveIdentity(SESSION);
        hiveMetastoreClient.updatePartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-01", actualStatistics -> statistics);
        hiveMetastoreClient.updatePartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-02", actualStatistics -> statistics);
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            ConnectorTableHandle tableHandle = metadata.getTableHandle(session, tableName);
            TableStatistics unsampledStatistics = metadata.getTableStatistics(sampleSize(2), tableHandle, Constraint.alwaysTrue(), true);
            TableStatistics sampledStatistics = metadata.getTableStatistics(sampleSize(1), tableHandle, Constraint.alwaysTrue(), true);
            assertEquals(sampledStatistics, unsampledStatistics);
        }
    } finally {
        dropTable(tableName);
    }
}
Also used : ThriftHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.ThriftHiveMetastore) CachingHiveMetastore(io.prestosql.plugin.hive.metastore.CachingHiveMetastore) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveMetastore(io.prestosql.plugin.hive.metastore.HiveMetastore) BridgingHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.BridgingHiveMetastore) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) TableStatistics(io.prestosql.spi.statistics.TableStatistics) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle)

Example 10 with TableStatistics

use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.

the class MetastoreHiveStatisticsProvider method getTableStatistics.

@Override
public TableStatistics getTableStatistics(ConnectorSession session, SchemaTableName schemaTableName, Map<String, ColumnHandle> columns, Map<String, Type> columnTypes, List<HivePartition> partitions, boolean includeColumnStatistics, Table table) {
    if (!isStatisticsEnabled(session)) {
        return TableStatistics.empty();
    }
    if (partitions.isEmpty()) {
        return createZeroStatistics(columns, columnTypes);
    }
    int sampleSize = getPartitionStatisticsSampleSize(session);
    List<HivePartition> partitionsSample = null;
    SamplePartition sample = samplePartitionCache.get(table);
    if (includeColumnStatistics || sample == null || sample.partitionCount != partitions.size()) {
        partitionsSample = getPartitionsSample(partitions, sampleSize);
        samplePartitionCache.put(table, new SamplePartition(partitions.size(), partitionsSample));
    } else if (sample != null) {
        partitionsSample = sample.partitionsSample;
    }
    try {
        Map<String, PartitionStatistics> statisticsSample = statisticsProvider.getPartitionsStatistics(session, schemaTableName, partitionsSample, table);
        if (!includeColumnStatistics) {
            OptionalDouble averageRows = calculateAverageRowsPerPartition(statisticsSample.values());
            TableStatistics.Builder result = TableStatistics.builder();
            if (averageRows.isPresent()) {
                result.setRowCount(Estimate.of(averageRows.getAsDouble() * partitions.size()));
            }
            result.setFileCount(calulateFileCount(statisticsSample.values()));
            result.setOnDiskDataSizeInBytes(calculateTotalOnDiskSizeInBytes(statisticsSample.values()));
            return result.build();
        } else {
            validatePartitionStatistics(schemaTableName, statisticsSample);
            return getTableStatistics(columns, columnTypes, partitions, statisticsSample);
        }
    } catch (PrestoException e) {
        if (e.getErrorCode().equals(HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode()) && isIgnoreCorruptedStatistics(session)) {
            log.error(e);
            return TableStatistics.empty();
        }
        throw e;
    }
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) TableStatistics(io.prestosql.spi.statistics.TableStatistics) PrestoException(io.prestosql.spi.PrestoException) OptionalDouble(java.util.OptionalDouble) HivePartition(io.prestosql.plugin.hive.HivePartition)

Aggregations

TableStatistics (io.prestosql.spi.statistics.TableStatistics)35 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)18 Type (io.prestosql.spi.type.Type)14 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)13 Map (java.util.Map)13 ConnectorTableHandle (io.prestosql.spi.connector.ConnectorTableHandle)12 ColumnStatistics (io.prestosql.spi.statistics.ColumnStatistics)12 HiveColumnStatistics (io.prestosql.plugin.hive.metastore.HiveColumnStatistics)10 Constraint (io.prestosql.spi.connector.Constraint)10 Test (org.testng.annotations.Test)10 PrestoException (io.prestosql.spi.PrestoException)9 ConnectorMetadata (io.prestosql.spi.connector.ConnectorMetadata)9 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)9 DoubleRange (io.prestosql.spi.statistics.DoubleRange)9 HashMap (java.util.HashMap)9 List (java.util.List)9 Optional (java.util.Optional)9 ImmutableMap (com.google.common.collect.ImmutableMap)8 HiveColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle)8 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)8