use of io.trino.plugin.hive.HivePartition in project trino by trinodb.
the class ThriftHiveMetastore method acquireSharedLock.
private void acquireSharedLock(HiveIdentity identity, AcidTransactionOwner transactionOwner, String queryId, long transactionId, List<SchemaTableName> fullTables, List<HivePartition> partitions, DataOperationType operation, boolean isDynamicPartitionWrite) {
requireNonNull(operation, "operation is null");
requireNonNull(transactionOwner, "transactionOwner is null");
requireNonNull(queryId, "queryId is null");
if (fullTables.isEmpty() && partitions.isEmpty()) {
return;
}
LockRequestBuilder request = new LockRequestBuilder(queryId).setTransactionId(transactionId).setUser(transactionOwner.toString());
for (SchemaTableName table : fullTables) {
request.addLockComponent(createLockComponentForOperation(table, operation, isDynamicPartitionWrite, Optional.empty()));
}
for (HivePartition partition : partitions) {
request.addLockComponent(createLockComponentForOperation(partition.getTableName(), operation, isDynamicPartitionWrite, Optional.of(partition.getPartitionId())));
}
acquireLock(identity, format("hive transaction %s for query %s", transactionId, queryId), request.build());
}
use of io.trino.plugin.hive.HivePartition in project trino by trinodb.
the class MetastoreHiveStatisticsProvider method calculateDataSizeForPartitioningKey.
@VisibleForTesting
static Estimate calculateDataSizeForPartitioningKey(HiveColumnHandle column, Type type, List<HivePartition> partitions, Map<String, PartitionStatistics> statistics, double averageRowsPerPartition) {
if (!hasDataSize(type)) {
return Estimate.unknown();
}
double dataSize = 0;
for (HivePartition partition : partitions) {
int length = getSize(partition.getKeys().get(column));
double rowCount = getPartitionRowCount(partition.getPartitionId(), statistics).orElse(averageRowsPerPartition);
dataSize += length * rowCount;
}
return Estimate.of(dataSize);
}
use of io.trino.plugin.hive.HivePartition in project trino by trinodb.
the class HiveTransaction method getValidWriteIds.
public ValidTxnWriteIdList getValidWriteIds(AcidTransactionOwner transactionOwner, HiveMetastoreClosure metastore, HiveTableHandle tableHandle) {
List<SchemaTableName> lockedTables;
List<HivePartition> lockedPartitions;
if (tableHandle.getPartitionColumns().isEmpty() || tableHandle.getPartitions().isEmpty()) {
lockedTables = ImmutableList.of(tableHandle.getSchemaTableName());
lockedPartitions = ImmutableList.of();
} else {
lockedTables = ImmutableList.of();
lockedPartitions = tableHandle.getPartitions().get();
}
// Different calls for same table might need to lock different partitions so acquire locks every time
metastore.acquireSharedReadLock(transactionOwner, queryId, transactionId, lockedTables, lockedPartitions);
// For repeatable reads within a query, use the same list of valid transactions for a table which have once been used
return validHiveTransactionsForTable.computeIfAbsent(tableHandle.getSchemaTableName(), schemaTableName -> new ValidTxnWriteIdList(metastore.getValidWriteIds(ImmutableList.of(schemaTableName), transactionId)));
}
use of io.trino.plugin.hive.HivePartition in project trino by trinodb.
the class MetastoreHiveStatisticsProvider method getPartitionsSample.
@VisibleForTesting
static List<HivePartition> getPartitionsSample(List<HivePartition> partitions, int sampleSize) {
checkArgument(sampleSize > 0, "sampleSize is expected to be greater than zero");
if (partitions.size() <= sampleSize) {
return partitions;
}
List<HivePartition> result = new ArrayList<>();
int samplesLeft = sampleSize;
HivePartition min = partitions.get(0);
HivePartition max = partitions.get(0);
for (HivePartition partition : partitions) {
if (partition.getPartitionId().compareTo(min.getPartitionId()) < 0) {
min = partition;
} else if (partition.getPartitionId().compareTo(max.getPartitionId()) > 0) {
max = partition;
}
}
result.add(min);
samplesLeft--;
if (samplesLeft > 0) {
result.add(max);
samplesLeft--;
}
if (samplesLeft > 0) {
HashFunction hashFunction = murmur3_128();
Comparator<Map.Entry<HivePartition, Long>> hashComparator = Comparator.<Map.Entry<HivePartition, Long>, Long>comparing(Map.Entry::getValue).thenComparing(entry -> entry.getKey().getPartitionId());
partitions.stream().filter(partition -> !result.contains(partition)).map(partition -> immutableEntry(partition, hashFunction.hashUnencodedChars(partition.getPartitionId()).asLong())).sorted(hashComparator).limit(samplesLeft).forEachOrdered(entry -> result.add(entry.getKey()));
}
return unmodifiableList(result);
}
use of io.trino.plugin.hive.HivePartition in project trino by trinodb.
the class MetastoreHiveStatisticsProvider method calculateRangeForPartitioningKey.
@VisibleForTesting
static Optional<DoubleRange> calculateRangeForPartitioningKey(HiveColumnHandle column, Type type, List<HivePartition> partitions) {
List<OptionalDouble> convertedValues = partitions.stream().map(HivePartition::getKeys).map(keys -> keys.get(column)).filter(value -> !value.isNull()).map(NullableValue::getValue).map(value -> convertPartitionValueToDouble(type, value)).collect(toImmutableList());
if (convertedValues.stream().noneMatch(OptionalDouble::isPresent)) {
return Optional.empty();
}
double[] values = convertedValues.stream().peek(convertedValue -> checkState(convertedValue.isPresent(), "convertedValue is missing")).mapToDouble(OptionalDouble::getAsDouble).toArray();
verify(values.length != 0, "No values");
if (DoubleStream.of(values).anyMatch(Double::isNaN)) {
return Optional.empty();
}
double min = DoubleStream.of(values).min().orElseThrow();
double max = DoubleStream.of(values).max().orElseThrow();
return Optional.of(new DoubleRange(min, max));
}
Aggregations