use of io.trino.spi.connector.ColumnMetadata in project trino by trinodb.
the class TestHiveGlueMetastore method testStatisticsLargeNumberOfColumns.
@Test
public void testStatisticsLargeNumberOfColumns() throws Exception {
SchemaTableName tableName = temporaryTable("test_statistics_large_number_of_columns");
try {
ImmutableList.Builder<ColumnMetadata> columns = ImmutableList.builder();
ImmutableMap.Builder<String, HiveColumnStatistics> columnStatistics = ImmutableMap.builder();
for (int i = 1; i < 1500; ++i) {
String columnName = "t_bigint " + i + "_" + String.join("", Collections.nCopies(240, "x"));
columns.add(new ColumnMetadata(columnName, BIGINT));
columnStatistics.put(columnName, createIntegerColumnStatistics(OptionalLong.of(-1000 - i), OptionalLong.of(1000 + i), OptionalLong.of(i), OptionalLong.of(2 * i)));
}
PartitionStatistics partitionStatistics = PartitionStatistics.builder().setBasicStatistics(HIVE_BASIC_STATISTICS).setColumnStatistics(columnStatistics.buildOrThrow()).build();
doCreateEmptyTable(tableName, ORC, columns.build());
testUpdateTableStatistics(tableName, EMPTY_TABLE_STATISTICS, partitionStatistics);
} finally {
dropTable(tableName);
}
}
use of io.trino.spi.connector.ColumnMetadata in project trino by trinodb.
the class TestCreateTableTask method testCreateWithNotNullColumns.
@Test
public void testCreateWithNotNullColumns() {
metadata.setConnectorCapabilities(NOT_NULL_COLUMN_CONSTRAINT);
List<TableElement> inputColumns = ImmutableList.of(new ColumnDefinition(identifier("a"), toSqlType(DATE), true, emptyList(), Optional.empty()), new ColumnDefinition(identifier("b"), toSqlType(VARCHAR), false, emptyList(), Optional.empty()), new ColumnDefinition(identifier("c"), toSqlType(VARBINARY), false, emptyList(), Optional.empty()));
CreateTable statement = new CreateTable(QualifiedName.of("test_table"), inputColumns, true, ImmutableList.of(), Optional.empty());
CreateTableTask createTableTask = new CreateTableTask(plannerContext, new AllowAllAccessControl(), columnPropertyManager, tablePropertyManager);
getFutureValue(createTableTask.internalExecute(statement, testSession, emptyList(), output -> {
}));
assertEquals(metadata.getCreateTableCallCount(), 1);
List<ColumnMetadata> columns = metadata.getReceivedTableMetadata().get(0).getColumns();
assertEquals(columns.size(), 3);
assertEquals(columns.get(0).getName(), "a");
assertEquals(columns.get(0).getType().getDisplayName().toUpperCase(ENGLISH), "DATE");
assertTrue(columns.get(0).isNullable());
assertEquals(columns.get(1).getName(), "b");
assertEquals(columns.get(1).getType().getDisplayName().toUpperCase(ENGLISH), "VARCHAR");
assertFalse(columns.get(1).isNullable());
assertEquals(columns.get(2).getName(), "c");
assertEquals(columns.get(2).getType().getDisplayName().toUpperCase(ENGLISH), "VARBINARY");
assertFalse(columns.get(2).isNullable());
}
use of io.trino.spi.connector.ColumnMetadata in project trino by trinodb.
the class TestDefaultJdbcMetadata method testCreateAndAlterTable.
@Test
public void testCreateAndAlterTable() {
SchemaTableName table = new SchemaTableName("example", "foo");
metadata.createTable(SESSION, new ConnectorTableMetadata(table, ImmutableList.of(new ColumnMetadata("text", VARCHAR))), false);
JdbcTableHandle handle = metadata.getTableHandle(SESSION, table);
ConnectorTableMetadata layout = metadata.getTableMetadata(SESSION, handle);
assertEquals(layout.getTable(), table);
assertEquals(layout.getColumns().size(), 1);
assertEquals(layout.getColumns().get(0), new ColumnMetadata("text", VARCHAR));
metadata.addColumn(SESSION, handle, new ColumnMetadata("x", VARCHAR));
layout = metadata.getTableMetadata(SESSION, handle);
assertEquals(layout.getColumns().size(), 2);
assertEquals(layout.getColumns().get(0), new ColumnMetadata("text", VARCHAR));
assertEquals(layout.getColumns().get(1), new ColumnMetadata("x", VARCHAR));
JdbcColumnHandle columnHandle = new JdbcColumnHandle("x", JDBC_VARCHAR, VARCHAR);
metadata.dropColumn(SESSION, handle, columnHandle);
layout = metadata.getTableMetadata(SESSION, handle);
assertEquals(layout.getColumns().size(), 1);
assertEquals(layout.getColumns().get(0), new ColumnMetadata("text", VARCHAR));
SchemaTableName newTableName = new SchemaTableName("example", "bar");
metadata.renameTable(SESSION, handle, newTableName);
handle = metadata.getTableHandle(SESSION, newTableName);
layout = metadata.getTableMetadata(SESSION, handle);
assertEquals(layout.getTable(), newTableName);
assertEquals(layout.getColumns().size(), 1);
assertEquals(layout.getColumns().get(0), new ColumnMetadata("text", VARCHAR));
}
use of io.trino.spi.connector.ColumnMetadata in project trino by trinodb.
the class TestDefaultJdbcMetadata method testAggregationPushdownForTableHandle.
@Test
public void testAggregationPushdownForTableHandle() {
ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(new JdbcMetadataSessionProperties(new JdbcMetadataConfig().setAggregationPushdownEnabled(true), Optional.empty()).getSessionProperties()).build();
ColumnHandle groupByColumn = metadata.getColumnHandles(session, tableHandle).get("text");
Function<ConnectorTableHandle, Optional<AggregationApplicationResult<ConnectorTableHandle>>> applyAggregation = handle -> metadata.applyAggregation(session, handle, ImmutableList.of(new AggregateFunction("count", BIGINT, List.of(), List.of(), false, Optional.empty())), ImmutableMap.of(), ImmutableList.of(ImmutableList.of(groupByColumn)));
ConnectorTableHandle baseTableHandle = metadata.getTableHandle(session, new SchemaTableName("example", "numbers"));
Optional<AggregationApplicationResult<ConnectorTableHandle>> aggregationResult = applyAggregation.apply(baseTableHandle);
assertThat(aggregationResult).isPresent();
SchemaTableName noAggregationPushdownTable = new SchemaTableName("example", "no_aggregation_pushdown");
metadata.createTable(SESSION, new ConnectorTableMetadata(noAggregationPushdownTable, ImmutableList.of(new ColumnMetadata("text", VARCHAR))), false);
ConnectorTableHandle noAggregationPushdownTableHandle = metadata.getTableHandle(session, noAggregationPushdownTable);
aggregationResult = applyAggregation.apply(noAggregationPushdownTableHandle);
assertThat(aggregationResult).isEmpty();
}
use of io.trino.spi.connector.ColumnMetadata in project trino by trinodb.
the class HiveMetastoreBackedDeltaLakeMetastore method getTableStatistics.
@Override
public TableStatistics getTableStatistics(ConnectorSession session, DeltaLakeTableHandle tableHandle, Constraint constraint) {
TableSnapshot tableSnapshot = getSnapshot(tableHandle.getSchemaTableName(), session);
double numRecords = 0L;
MetadataEntry metadata = transactionLogAccess.getMetadataEntry(tableSnapshot, session).orElseThrow(() -> new TrinoException(DELTA_LAKE_INVALID_SCHEMA, "Metadata not found in transaction log for " + tableHandle.getTableName()));
List<ColumnMetadata> columnMetadata = DeltaLakeSchemaSupport.extractSchema(metadata, typeManager);
List<DeltaLakeColumnHandle> columns = columnMetadata.stream().map(columnMeta -> new DeltaLakeColumnHandle(columnMeta.getName(), columnMeta.getType(), metadata.getCanonicalPartitionColumns().contains(columnMeta.getName()) ? PARTITION_KEY : REGULAR)).collect(toImmutableList());
Map<DeltaLakeColumnHandle, Double> nullCounts = new HashMap<>();
columns.forEach(column -> nullCounts.put(column, 0.0));
Map<DeltaLakeColumnHandle, Double> minValues = new HashMap<>();
Map<DeltaLakeColumnHandle, Double> maxValues = new HashMap<>();
Map<DeltaLakeColumnHandle, Set<String>> partitioningColumnsDistinctValues = new HashMap<>();
columns.stream().filter(column -> column.getColumnType() == PARTITION_KEY).forEach(column -> partitioningColumnsDistinctValues.put(column, new HashSet<>()));
if (tableHandle.getEnforcedPartitionConstraint().isNone() || tableHandle.getNonPartitionConstraint().isNone() || constraint.getSummary().isNone()) {
return createZeroStatistics(columns);
}
Set<String> predicatedColumnNames = tableHandle.getNonPartitionConstraint().getDomains().orElseThrow().keySet().stream().map(DeltaLakeColumnHandle::getName).collect(toImmutableSet());
List<ColumnMetadata> predicatedColumns = columnMetadata.stream().filter(column -> predicatedColumnNames.contains(column.getName())).collect(toImmutableList());
for (AddFileEntry addEntry : transactionLogAccess.getActiveFiles(tableSnapshot, session)) {
Optional<? extends DeltaLakeFileStatistics> fileStatistics = addEntry.getStats();
if (fileStatistics.isEmpty()) {
// Open source Delta Lake does not collect stats
return TableStatistics.empty();
}
DeltaLakeFileStatistics stats = fileStatistics.get();
if (!partitionMatchesPredicate(addEntry.getCanonicalPartitionValues(), tableHandle.getEnforcedPartitionConstraint().getDomains().orElseThrow())) {
continue;
}
TupleDomain<DeltaLakeColumnHandle> statisticsPredicate = createStatisticsPredicate(addEntry, predicatedColumns, tableHandle.getMetadataEntry().getCanonicalPartitionColumns());
if (!tableHandle.getNonPartitionConstraint().overlaps(statisticsPredicate)) {
continue;
}
if (stats.getNumRecords().isEmpty()) {
// Not clear if it's possible for stats to be present with no row count, but bail out if that happens
return TableStatistics.empty();
}
numRecords += stats.getNumRecords().get();
for (DeltaLakeColumnHandle column : columns) {
if (column.getColumnType() == PARTITION_KEY) {
Optional<String> partitionValue = addEntry.getCanonicalPartitionValues().get(column.getName());
if (partitionValue.isEmpty()) {
nullCounts.merge(column, (double) stats.getNumRecords().get(), Double::sum);
} else {
// NULL is not counted as a distinct value
// Code below assumes that values returned by addEntry.getCanonicalPartitionValues() are normalized,
// it may not be true in case of real, doubles, timestamps etc
partitioningColumnsDistinctValues.get(column).add(partitionValue.get());
}
} else {
Optional<Long> maybeNullCount = stats.getNullCount(column.getName());
if (maybeNullCount.isPresent()) {
nullCounts.put(column, nullCounts.get(column) + maybeNullCount.get());
} else {
// If any individual file fails to report null counts, fail to calculate the total for the table
nullCounts.put(column, NaN);
}
}
// Math.min returns NaN if any operand is NaN
stats.getMinColumnValue(column).map(parsedValue -> toStatsRepresentation(column.getType(), parsedValue)).filter(OptionalDouble::isPresent).map(OptionalDouble::getAsDouble).ifPresent(parsedValueAsDouble -> minValues.merge(column, parsedValueAsDouble, Math::min));
stats.getMaxColumnValue(column).map(parsedValue -> toStatsRepresentation(column.getType(), parsedValue)).filter(OptionalDouble::isPresent).map(OptionalDouble::getAsDouble).ifPresent(parsedValueAsDouble -> maxValues.merge(column, parsedValueAsDouble, Math::max));
}
}
if (numRecords == 0) {
return createZeroStatistics(columns);
}
TableStatistics.Builder statsBuilder = new TableStatistics.Builder().setRowCount(Estimate.of(numRecords));
Optional<DeltaLakeStatistics> statistics = Optional.empty();
if (isExtendedStatisticsEnabled(session)) {
statistics = statisticsAccess.readDeltaLakeStatistics(session, tableHandle.getLocation());
}
for (DeltaLakeColumnHandle column : columns) {
ColumnStatistics.Builder columnStatsBuilder = new ColumnStatistics.Builder();
Double nullCount = nullCounts.get(column);
columnStatsBuilder.setNullsFraction(nullCount.isNaN() ? Estimate.unknown() : Estimate.of(nullCount / numRecords));
Double maxValue = maxValues.get(column);
Double minValue = minValues.get(column);
if (isValidInRange(maxValue) && isValidInRange(minValue)) {
columnStatsBuilder.setRange(new DoubleRange(minValue, maxValue));
} else if (isValidInRange(maxValue)) {
columnStatsBuilder.setRange(new DoubleRange(NEGATIVE_INFINITY, maxValue));
} else if (isValidInRange(minValue)) {
columnStatsBuilder.setRange(new DoubleRange(minValue, POSITIVE_INFINITY));
}
// extend statistics with NDV
if (column.getColumnType() == PARTITION_KEY) {
columnStatsBuilder.setDistinctValuesCount(Estimate.of(partitioningColumnsDistinctValues.get(column).size()));
}
if (statistics.isPresent()) {
DeltaLakeColumnStatistics deltaLakeColumnStatistics = statistics.get().getColumnStatistics().get(column.getName());
if (deltaLakeColumnStatistics != null && column.getColumnType() != PARTITION_KEY) {
columnStatsBuilder.setDistinctValuesCount(Estimate.of(deltaLakeColumnStatistics.getNdvSummary().cardinality()));
}
}
statsBuilder.setColumnStatistics(column, columnStatsBuilder.build());
}
return statsBuilder.build();
}
Aggregations