Search in sources :

Example 11 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.

the class MetastoreHiveStatisticsProvider method getPartitionRowCount.

private static OptionalDouble getPartitionRowCount(String partitionName, Map<String, PartitionStatistics> statistics) {
    PartitionStatistics partitionStatistics = statistics.get(partitionName);
    if (partitionStatistics == null) {
        return OptionalDouble.empty();
    }
    OptionalLong rowCount = partitionStatistics.getBasicStatistics().getRowCount();
    if (rowCount.isPresent()) {
        verify(rowCount.getAsLong() >= 0, "rowCount must be greater than or equal to zero");
        return OptionalDouble.of(rowCount.getAsLong());
    }
    return OptionalDouble.empty();
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) OptionalLong(java.util.OptionalLong)

Example 12 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.

the class SemiTransactionalHiveMetastore method finishInsertIntoExistingPartition.

public synchronized void finishInsertIntoExistingPartition(ConnectorSession session, String databaseName, String tableName, List<String> partitionValues, Path currentLocation, List<String> fileNames, PartitionStatistics statisticsUpdate, HiveACIDWriteType acidWriteType) {
    setShared();
    isVacuumIncluded |= HiveACIDWriteType.isVacuum(acidWriteType);
    HiveIdentity identity = new HiveIdentity(session);
    SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName);
    Map<List<String>, Action<PartitionAndMore>> partitionActionsOfTable = partitionActions.computeIfAbsent(schemaTableName, k -> new LinkedHashMap<>());
    Action<PartitionAndMore> oldPartitionAction = partitionActionsOfTable.get(partitionValues);
    if (oldPartitionAction == null) {
        Partition partition = delegate.getPartition(identity, databaseName, tableName, partitionValues).orElseThrow(() -> new PartitionNotFoundException(schemaTableName, partitionValues));
        String partitionName = getPartitionName(identity, databaseName, tableName, partitionValues);
        PartitionStatistics mergedStatistics = statisticsUpdate;
        boolean updateStats = canUpdateStats(session, acidWriteType);
        if (updateStats) {
            PartitionStatistics currentStatistics = closure.getPartitionStatistics(identity, databaseName, tableName, ImmutableSet.of(partitionName)).get(partitionName);
            if (currentStatistics == null) {
                throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, "currentStatistics is null");
            }
            mergedStatistics = merge(currentStatistics, statisticsUpdate);
        }
        HdfsContext context = new HdfsContext(session, databaseName, tableName);
        partitionActionsOfTable.put(partitionValues, new Action<>(ActionType.INSERT_EXISTING, new PartitionAndMore(identity, partition, currentLocation, Optional.of(fileNames), mergedStatistics, statisticsUpdate, updateStats), context, identity));
        return;
    }
    switch(oldPartitionAction.getType()) {
        case DROP:
            throw new PartitionNotFoundException(schemaTableName, partitionValues);
        case ADD:
        case ALTER:
        case INSERT_EXISTING:
            throw new UnsupportedOperationException("Inserting into a partition that were added, altered, or inserted into in the same transaction is not supported");
        default:
            throw new IllegalStateException("Unknown action type");
    }
}
Also used : PrestoException(io.prestosql.spi.PrestoException) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) PartitionNotFoundException(io.prestosql.plugin.hive.PartitionNotFoundException) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext)

Example 13 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.

the class CachingHiveMetastore method loadPartitionColumnStatistics.

private Map<WithIdentity<HivePartitionName>, WithValidation<Table, PartitionStatistics>> loadPartitionColumnStatistics(Iterable<? extends WithIdentity<HivePartitionName>> keys) {
    SetMultimap<WithIdentity<HiveTableName>, WithIdentity<HivePartitionName>> tablePartitions = stream(keys).collect(toImmutableSetMultimap(value -> new WithIdentity<>(value.getIdentity(), value.getKey().getHiveTableName()), key -> key));
    ImmutableMap.Builder<WithIdentity<HivePartitionName>, WithValidation<Table, PartitionStatistics>> result = ImmutableMap.builder();
    tablePartitions.keySet().forEach(tableName -> {
        Set<WithIdentity<HivePartitionName>> partitionNames = tablePartitions.get(tableName);
        Set<String> partitionNameStrings = partitionNames.stream().map(partitionName -> partitionName.getKey().getPartitionName().get()).collect(toImmutableSet());
        Table table = getExistingTable(tableName.getIdentity(), tableName.getKey().getDatabaseName(), tableName.getKey().getTableName());
        List<Partition> partitions = getExistingPartitionsByNames(tableName.getIdentity(), table, ImmutableList.copyOf(partitionNameStrings));
        Map<String, PartitionStatistics> statisticsByPartitionName = delegate.getPartitionStatistics(tableName.getIdentity(), table, partitions);
        for (WithIdentity<HivePartitionName> partitionName : partitionNames) {
            String stringNameForPartition = partitionName.getKey().getPartitionName().get();
            PartitionStatistics value = statisticsByPartitionName.get(stringNameForPartition);
            if (value == null) {
                throw new PrestoException(HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY, "Statistics result does not contain entry for partition: " + stringNameForPartition);
            }
            result.put(partitionName, new WithValidation<>(getCacheValidationPartitionParams(table, value.getBasicStatistics()), value));
        }
    });
    return result.build();
}
Also used : HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) LoadingCache(com.google.common.cache.LoadingCache) Iterables.transform(com.google.common.collect.Iterables.transform) ShowLocksResponse(org.apache.hadoop.hive.metastore.api.ShowLocksResponse) Throwables.throwIfUnchecked(com.google.common.base.Throwables.throwIfUnchecked) RoleGrant(io.prestosql.spi.security.RoleGrant) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) HiveConfig(io.prestosql.plugin.hive.HiveConfig) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) Map(java.util.Map) HivePartitionManager.extractPartitionValues(io.prestosql.plugin.hive.HivePartitionManager.extractPartitionValues) Type(io.prestosql.spi.type.Type) HiveErrorCode(io.prestosql.plugin.hive.HiveErrorCode) PrestoException(io.prestosql.spi.PrestoException) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) HiveType(io.prestosql.plugin.hive.HiveType) ShowLocksRequest(org.apache.hadoop.hive.metastore.api.ShowLocksRequest) HivePartition(io.prestosql.plugin.hive.HivePartition) CacheLoader(com.google.common.cache.CacheLoader) Objects(java.util.Objects) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Entry(java.util.Map.Entry) ThriftMetastoreUtil(io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Iterables(com.google.common.collect.Iterables) Logger(io.airlift.log.Logger) ImmutableSetMultimap.toImmutableSetMultimap(com.google.common.collect.ImmutableSetMultimap.toImmutableSetMultimap) MoreExecutors.newDirectExecutorService(com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) MetastoreUtil.makePartitionName(io.prestosql.plugin.hive.metastore.MetastoreUtil.makePartitionName) ArrayList(java.util.ArrayList) DataOperationType(org.apache.hadoop.hive.metastore.api.DataOperationType) Inject(javax.inject.Inject) OptionalLong(java.util.OptionalLong) ForCachingHiveMetastoreTableRefresh(io.prestosql.plugin.hive.ForCachingHiveMetastoreTableRefresh) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ImmutableList(com.google.common.collect.ImmutableList) Managed(org.weakref.jmx.Managed) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) HivePartitionName.hivePartitionName(io.prestosql.plugin.hive.metastore.HivePartitionName.hivePartitionName) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) ForCachingHiveMetastore(io.prestosql.plugin.hive.ForCachingHiveMetastore) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) Executor(java.util.concurrent.Executor) HiveTableName.hiveTableName(io.prestosql.plugin.hive.metastore.HiveTableName.hiveTableName) Throwables.throwIfInstanceOf(com.google.common.base.Throwables.throwIfInstanceOf) SetMultimap(com.google.common.collect.SetMultimap) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) NodeManager(io.prestosql.spi.NodeManager) CacheLoader.asyncReloading(com.google.common.cache.CacheLoader.asyncReloading) PartitionNotFoundException(io.prestosql.plugin.hive.PartitionNotFoundException) HivePartition(io.prestosql.plugin.hive.HivePartition) PrestoException(io.prestosql.spi.PrestoException) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics)

Example 14 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.

the class CachingHiveMetastore method loadPartitionColumnStatistics.

private WithValidation<Table, PartitionStatistics> loadPartitionColumnStatistics(WithIdentity<HivePartitionName> partition) {
    HiveTableName hiveTableName = partition.getKey().getHiveTableName();
    HiveIdentity identity = partition.getIdentity();
    Table table = getExistingTable(identity, hiveTableName.getDatabaseName(), hiveTableName.getTableName());
    String partitionName = partition.getKey().getPartitionName().get();
    Map<String, PartitionStatistics> partitionStatistics = delegate.getPartitionStatistics(identity, table, ImmutableList.of(getExistingPartition(identity, table, partition.getKey().getPartitionValues())));
    if (!partitionStatistics.containsKey(partitionName)) {
        throw new PrestoException(HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY, "Statistics result does not contain entry for partition: " + partition.getKey().getPartitionName());
    }
    PartitionStatistics value = partitionStatistics.get(partitionName);
    return new WithValidation<>(getCacheValidationPartitionParams(table, value.getBasicStatistics()), value);
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) PrestoException(io.prestosql.spi.PrestoException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity)

Example 15 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.

the class GlueHiveMetastore method updatePartitionStatistics.

@Override
public void updatePartitionStatistics(HiveIdentity identity, String databaseName, String tableName, String partitionName, Function<PartitionStatistics, PartitionStatistics> update) {
    List<String> partitionValues = toPartitionValues(partitionName);
    Partition partition = getPartition(identity, databaseName, tableName, partitionValues).orElseThrow(() -> new PrestoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Statistics result does not contain entry for partition: " + partitionName));
    PartitionStatistics currentStatistics = getPartitionStatistics(partition);
    PartitionStatistics updatedStatistics = update.apply(currentStatistics);
    if (!updatedStatistics.getColumnStatistics().isEmpty()) {
        throw new PrestoException(NOT_SUPPORTED, "Glue metastore does not support column level statistics");
    }
    try {
        PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
        partitionInput.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics()));
        glueClient.updatePartition(new UpdatePartitionRequest().withCatalogId(catalogId).withDatabaseName(databaseName).withTableName(tableName).withPartitionValueList(partition.getValues()).withPartitionInput(partitionInput));
    } catch (EntityNotFoundException e) {
        throw new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partitionValues);
    } catch (AmazonServiceException e) {
        throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, e);
    }
}
Also used : Partition(io.prestosql.plugin.hive.metastore.Partition) PartitionNotFoundException(io.prestosql.plugin.hive.PartitionNotFoundException) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) AmazonServiceException(com.amazonaws.AmazonServiceException) PrestoException(io.prestosql.spi.PrestoException) UpdatePartitionRequest(com.amazonaws.services.glue.model.UpdatePartitionRequest) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) PartitionInput(com.amazonaws.services.glue.model.PartitionInput)

Aggregations

PartitionStatistics (io.prestosql.plugin.hive.PartitionStatistics)62 PrestoException (io.prestosql.spi.PrestoException)32 HiveBasicStatistics (io.prestosql.plugin.hive.HiveBasicStatistics)31 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)31 HivePartition (io.prestosql.plugin.hive.HivePartition)20 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)18 TableNotFoundException (io.prestosql.spi.connector.TableNotFoundException)18 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)16 ImmutableMap (com.google.common.collect.ImmutableMap)16 PartitionNotFoundException (io.prestosql.plugin.hive.PartitionNotFoundException)16 ArrayList (java.util.ArrayList)15 HiveColumnStatistics (io.prestosql.plugin.hive.metastore.HiveColumnStatistics)14 List (java.util.List)14 OptionalLong (java.util.OptionalLong)14 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)12 ImmutableList (com.google.common.collect.ImmutableList)12 Logger (io.airlift.log.Logger)12 HiveErrorCode (io.prestosql.plugin.hive.HiveErrorCode)12 Type (io.prestosql.spi.type.Type)12 Map (java.util.Map)12