Search in sources :

Example 16 with PartitionStatistics

use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.

the class MetastoreHiveStatisticsProvider method getPartitionRowCount.

private static OptionalDouble getPartitionRowCount(String partitionName, Map<String, PartitionStatistics> statistics) {
    PartitionStatistics partitionStatistics = statistics.get(partitionName);
    if (partitionStatistics == null) {
        return OptionalDouble.empty();
    }
    OptionalLong rowCount = partitionStatistics.getBasicStatistics().getRowCount();
    if (rowCount.isPresent()) {
        verify(rowCount.getAsLong() >= 0, "rowCount must be greater than or equal to zero");
        return OptionalDouble.of(rowCount.getAsLong());
    }
    return OptionalDouble.empty();
}
Also used : PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) OptionalLong(java.util.OptionalLong)

Example 17 with PartitionStatistics

use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.

@Test
public void testGetTableStatisticsUnpartitioned() {
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
    HiveColumnHandle columnHandle = createBaseColumn(COLUMN, 2, HIVE_LONG, BIGINT, REGULAR, Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(SESSION, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected);
}
Also used : DoubleRange(io.trino.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.trino.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HivePartition(io.trino.plugin.hive.HivePartition) Test(org.testng.annotations.Test)

Example 18 with PartitionStatistics

use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.

the class TestMetastoreHiveStatisticsProvider method testCreateDataColumnStatistics.

@Test
public void testCreateDataColumnStatistics() {
    assertEquals(createDataColumnStatistics(COLUMN, BIGINT, 1000, ImmutableList.of()), ColumnStatistics.empty());
    assertEquals(createDataColumnStatistics(COLUMN, BIGINT, 1000, ImmutableList.of(PartitionStatistics.empty(), PartitionStatistics.empty())), ColumnStatistics.empty());
    assertEquals(createDataColumnStatistics(COLUMN, BIGINT, 1000, ImmutableList.of(new PartitionStatistics(HiveBasicStatistics.createZeroStatistics(), ImmutableMap.of("column2", HiveColumnStatistics.empty())))), ColumnStatistics.empty());
}
Also used : MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.trino.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) Test(org.testng.annotations.Test)

Example 19 with PartitionStatistics

use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.

the class SemiTransactionalHiveMetastore method updatePartitionStatistics.

// For HiveBasicStatistics, we only overwrite the original statistics if the new one is not empty.
// For HiveColumnStatistics, only overwrite the original statistics for columns present in the new ones and preserve the others.
private static PartitionStatistics updatePartitionStatistics(PartitionStatistics oldPartitionStats, PartitionStatistics newPartitionStats) {
    HiveBasicStatistics oldBasicStatistics = oldPartitionStats.getBasicStatistics();
    HiveBasicStatistics newBasicStatistics = newPartitionStats.getBasicStatistics();
    HiveBasicStatistics updatedBasicStatistics = new HiveBasicStatistics(firstPresent(newBasicStatistics.getFileCount(), oldBasicStatistics.getFileCount()), firstPresent(newBasicStatistics.getRowCount(), oldBasicStatistics.getRowCount()), firstPresent(newBasicStatistics.getInMemoryDataSizeInBytes(), oldBasicStatistics.getInMemoryDataSizeInBytes()), firstPresent(newBasicStatistics.getOnDiskDataSizeInBytes(), oldBasicStatistics.getOnDiskDataSizeInBytes()));
    Map<String, HiveColumnStatistics> updatedColumnStatistics = updateColumnStatistics(oldPartitionStats.getColumnStatistics(), newPartitionStats.getColumnStatistics());
    return new PartitionStatistics(updatedBasicStatistics, updatedColumnStatistics);
}
Also used : PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics)

Example 20 with PartitionStatistics

use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.

the class SemiTransactionalHiveMetastore method finishInsertIntoExistingPartition.

public synchronized void finishInsertIntoExistingPartition(ConnectorSession session, String databaseName, String tableName, List<String> partitionValues, Path currentLocation, List<String> fileNames, PartitionStatistics statisticsUpdate, boolean cleanExtraOutputFilesOnCommit) {
    setShared();
    SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName);
    Map<List<String>, Action<PartitionAndMore>> partitionActionsOfTable = partitionActions.computeIfAbsent(schemaTableName, k -> new HashMap<>());
    Action<PartitionAndMore> oldPartitionAction = partitionActionsOfTable.get(partitionValues);
    if (oldPartitionAction == null) {
        Partition partition = delegate.getPartition(databaseName, tableName, partitionValues).orElseThrow(() -> new PartitionNotFoundException(schemaTableName, partitionValues));
        String partitionName = getPartitionName(databaseName, tableName, partitionValues);
        PartitionStatistics currentStatistics = delegate.getPartitionStatistics(databaseName, tableName, ImmutableSet.of(partitionName)).get(partitionName);
        if (currentStatistics == null) {
            throw new TrinoException(HIVE_METASTORE_ERROR, "currentStatistics is null");
        }
        HdfsContext context = new HdfsContext(session);
        partitionActionsOfTable.put(partitionValues, new Action<>(ActionType.INSERT_EXISTING, new PartitionAndMore(partition, currentLocation, Optional.of(fileNames), merge(currentStatistics, statisticsUpdate), statisticsUpdate, cleanExtraOutputFilesOnCommit), context, session.getQueryId()));
        return;
    }
    switch(oldPartitionAction.getType()) {
        case DROP:
        case DROP_PRESERVE_DATA:
            throw new PartitionNotFoundException(schemaTableName, partitionValues);
        case ADD:
        case ALTER:
        case INSERT_EXISTING:
        case DELETE_ROWS:
        case UPDATE:
            throw new UnsupportedOperationException("Inserting into a partition that were added, altered, or inserted into in the same transaction is not supported");
    }
    throw new IllegalStateException("Unknown action type");
}
Also used : SchemaTableName(io.trino.spi.connector.SchemaTableName) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) TrinoException(io.trino.spi.TrinoException) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext)

Aggregations

PartitionStatistics (io.trino.plugin.hive.PartitionStatistics)36 SchemaTableName (io.trino.spi.connector.SchemaTableName)21 HiveBasicStatistics (io.trino.plugin.hive.HiveBasicStatistics)16 HiveColumnStatistics (io.trino.plugin.hive.metastore.HiveColumnStatistics)16 TrinoException (io.trino.spi.TrinoException)15 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)13 ImmutableMap (com.google.common.collect.ImmutableMap)13 List (java.util.List)12 Map (java.util.Map)11 OptionalLong (java.util.OptionalLong)11 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)10 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)10 TableNotFoundException (io.trino.spi.connector.TableNotFoundException)10 Type (io.trino.spi.type.Type)10 ArrayList (java.util.ArrayList)10 Objects.requireNonNull (java.util.Objects.requireNonNull)10 Optional (java.util.Optional)10 Set (java.util.Set)10 ImmutableList (com.google.common.collect.ImmutableList)9 PartitionNotFoundException (io.trino.plugin.hive.PartitionNotFoundException)8