use of io.trino.spi.predicate.NullableValue in project trino by trinodb.
the class MetastoreHiveStatisticsProvider method calculateRangeForPartitioningKey.
@VisibleForTesting
static Optional<DoubleRange> calculateRangeForPartitioningKey(HiveColumnHandle column, Type type, List<HivePartition> partitions) {
List<OptionalDouble> convertedValues = partitions.stream().map(HivePartition::getKeys).map(keys -> keys.get(column)).filter(value -> !value.isNull()).map(NullableValue::getValue).map(value -> convertPartitionValueToDouble(type, value)).collect(toImmutableList());
if (convertedValues.stream().noneMatch(OptionalDouble::isPresent)) {
return Optional.empty();
}
double[] values = convertedValues.stream().peek(convertedValue -> checkState(convertedValue.isPresent(), "convertedValue is missing")).mapToDouble(OptionalDouble::getAsDouble).toArray();
verify(values.length != 0, "No values");
if (DoubleStream.of(values).anyMatch(Double::isNaN)) {
return Optional.empty();
}
double min = DoubleStream.of(values).min().orElseThrow();
double max = DoubleStream.of(values).max().orElseThrow();
return Optional.of(new DoubleRange(min, max));
}
use of io.trino.spi.predicate.NullableValue in project trino by trinodb.
the class IcebergSplitSource method getNextBatch.
@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
long timeLeft = dynamicFilteringWaitTimeoutMillis - dynamicFilterWaitStopwatch.elapsed(MILLISECONDS);
if (dynamicFilter.isAwaitable() && timeLeft > 0) {
return dynamicFilter.isBlocked().thenApply(ignored -> EMPTY_BATCH).completeOnTimeout(EMPTY_BATCH, timeLeft, MILLISECONDS);
}
if (combinedScanIterable == null) {
// Used to avoid duplicating work if the Dynamic Filter was already pushed down to the Iceberg API
this.pushedDownDynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
TupleDomain<IcebergColumnHandle> fullPredicate = tableHandle.getUnenforcedPredicate().intersect(pushedDownDynamicFilterPredicate);
// TODO: (https://github.com/trinodb/trino/issues/9743): Consider removing TupleDomain#simplify
TupleDomain<IcebergColumnHandle> simplifiedPredicate = fullPredicate.simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
if (!simplifiedPredicate.equals(fullPredicate)) {
// Pushed down predicate was simplified, always evaluate it against individual splits
this.pushedDownDynamicFilterPredicate = TupleDomain.all();
}
TupleDomain<IcebergColumnHandle> effectivePredicate = tableHandle.getEnforcedPredicate().intersect(simplifiedPredicate);
if (effectivePredicate.isNone()) {
finish();
return completedFuture(NO_MORE_SPLITS_BATCH);
}
Expression filterExpression = toIcebergExpression(effectivePredicate);
this.combinedScanIterable = tableScan.filter(filterExpression).includeColumnStats().planTasks();
this.fileScanIterator = Streams.stream(combinedScanIterable).map(CombinedScanTask::files).flatMap(Collection::stream).iterator();
}
TupleDomain<IcebergColumnHandle> dynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
if (dynamicFilterPredicate.isNone()) {
finish();
return completedFuture(NO_MORE_SPLITS_BATCH);
}
Iterator<FileScanTask> fileScanTasks = Iterators.limit(fileScanIterator, maxSize);
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
while (fileScanTasks.hasNext()) {
FileScanTask scanTask = fileScanTasks.next();
if (!scanTask.deletes().isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "Iceberg tables with delete files are not supported: " + tableHandle.getSchemaTableName());
}
if (maxScannedFileSizeInBytes.isPresent() && scanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) {
continue;
}
IcebergSplit icebergSplit = toIcebergSplit(scanTask);
Schema fileSchema = scanTask.spec().schema();
Set<IcebergColumnHandle> identityPartitionColumns = icebergSplit.getPartitionKeys().keySet().stream().map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)).collect(toImmutableSet());
Supplier<Map<ColumnHandle, NullableValue>> partitionValues = memoize(() -> {
Map<ColumnHandle, NullableValue> bindings = new HashMap<>();
for (IcebergColumnHandle partitionColumn : identityPartitionColumns) {
Object partitionValue = deserializePartitionValue(partitionColumn.getType(), icebergSplit.getPartitionKeys().get(partitionColumn.getId()).orElse(null), partitionColumn.getName());
NullableValue bindingValue = new NullableValue(partitionColumn.getType(), partitionValue);
bindings.put(partitionColumn, bindingValue);
}
return bindings;
});
if (!dynamicFilterPredicate.isAll() && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) {
if (!partitionMatchesPredicate(identityPartitionColumns, partitionValues, dynamicFilterPredicate)) {
continue;
}
if (!fileMatchesPredicate(fieldIdToType, dynamicFilterPredicate, scanTask.file().lowerBounds(), scanTask.file().upperBounds(), scanTask.file().nullValueCounts())) {
continue;
}
}
if (!partitionMatchesConstraint(identityPartitionColumns, partitionValues, constraint)) {
continue;
}
if (recordScannedFiles) {
scannedFiles.add(scanTask.file());
}
splits.add(icebergSplit);
}
return completedFuture(new ConnectorSplitBatch(splits.build(), isFinished()));
}
use of io.trino.spi.predicate.NullableValue in project trino by trinodb.
the class PlanPrinter method formatFragment.
private static String formatFragment(Function<TableScanNode, TableInfo> tableInfoSupplier, Map<DynamicFilterId, DynamicFilterDomainStats> dynamicFilterDomainStats, ValuePrinter valuePrinter, PlanFragment fragment, Optional<StageInfo> stageInfo, Optional<Map<PlanNodeId, PlanNodeStats>> planNodeStats, boolean verbose, TypeProvider typeProvider) {
StringBuilder builder = new StringBuilder();
builder.append(format("Fragment %s [%s]\n", fragment.getId(), fragment.getPartitioning()));
if (stageInfo.isPresent()) {
StageStats stageStats = stageInfo.get().getStageStats();
double avgPositionsPerTask = stageInfo.get().getTasks().stream().mapToLong(task -> task.getStats().getProcessedInputPositions()).average().orElse(Double.NaN);
double squaredDifferences = stageInfo.get().getTasks().stream().mapToDouble(task -> Math.pow(task.getStats().getProcessedInputPositions() - avgPositionsPerTask, 2)).sum();
double sdAmongTasks = Math.sqrt(squaredDifferences / stageInfo.get().getTasks().size());
builder.append(indentString(1)).append(format("CPU: %s, Scheduled: %s, Input: %s (%s); per task: avg.: %s std.dev.: %s, Output: %s (%s)\n", stageStats.getTotalCpuTime().convertToMostSuccinctTimeUnit(), stageStats.getTotalScheduledTime().convertToMostSuccinctTimeUnit(), formatPositions(stageStats.getProcessedInputPositions()), stageStats.getProcessedInputDataSize(), formatDouble(avgPositionsPerTask), formatDouble(sdAmongTasks), formatPositions(stageStats.getOutputPositions()), stageStats.getOutputDataSize()));
}
PartitioningScheme partitioningScheme = fragment.getPartitioningScheme();
builder.append(indentString(1)).append(format("Output layout: [%s]\n", Joiner.on(", ").join(partitioningScheme.getOutputLayout())));
boolean replicateNullsAndAny = partitioningScheme.isReplicateNullsAndAny();
List<String> arguments = partitioningScheme.getPartitioning().getArguments().stream().map(argument -> {
if (argument.isConstant()) {
NullableValue constant = argument.getConstant();
String printableValue = valuePrinter.castToVarchar(constant.getType(), constant.getValue());
return constant.getType().getDisplayName() + "(" + printableValue + ")";
}
return argument.getColumn().toString();
}).collect(toImmutableList());
builder.append(indentString(1));
if (replicateNullsAndAny) {
builder.append(format("Output partitioning: %s (replicate nulls and any) [%s]%s\n", partitioningScheme.getPartitioning().getHandle(), Joiner.on(", ").join(arguments), formatHash(partitioningScheme.getHashColumn())));
} else {
builder.append(format("Output partitioning: %s [%s]%s\n", partitioningScheme.getPartitioning().getHandle(), Joiner.on(", ").join(arguments), formatHash(partitioningScheme.getHashColumn())));
}
builder.append(indentString(1)).append(format("Stage Execution Strategy: %s\n", fragment.getStageExecutionDescriptor().getStageExecutionStrategy()));
builder.append(new PlanPrinter(fragment.getRoot(), typeProvider, Optional.of(fragment.getStageExecutionDescriptor()), tableInfoSupplier, dynamicFilterDomainStats, valuePrinter, fragment.getStatsAndCosts(), planNodeStats).toText(verbose, 1)).append("\n");
return builder.toString();
}
use of io.trino.spi.predicate.NullableValue in project trino by trinodb.
the class AbstractTestHive method testBucketedTableStringInt.
@Test
public void testBucketedTableStringInt() throws Exception {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableBucketedStringInt);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
Map<String, Integer> columnIndex = indexColumns(columnHandles);
assertTableIsBucketed(tableHandle, transaction, session);
String testString = "test";
Integer testInt = 13;
Short testSmallint = 12;
// Reverse the order of bindings as compared to bucketing order
ImmutableMap<ColumnHandle, NullableValue> bindings = ImmutableMap.<ColumnHandle, NullableValue>builder().put(columnHandles.get(columnIndex.get("t_int")), NullableValue.of(INTEGER, (long) testInt)).put(columnHandles.get(columnIndex.get("t_string")), NullableValue.of(createUnboundedVarcharType(), utf8Slice(testString))).put(columnHandles.get(columnIndex.get("t_smallint")), NullableValue.of(SMALLINT, (long) testSmallint)).buildOrThrow();
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(1), Optional.empty());
boolean rowFound = false;
for (MaterializedRow row : result) {
if (testString.equals(row.getField(columnIndex.get("t_string"))) && testInt.equals(row.getField(columnIndex.get("t_int"))) && testSmallint.equals(row.getField(columnIndex.get("t_smallint")))) {
rowFound = true;
}
}
assertTrue(rowFound);
}
}
use of io.trino.spi.predicate.NullableValue in project trino by trinodb.
the class TestPartitionedOutputOperator method testOutputForSimplePageWithPartitionConstant.
@Test
public void testOutputForSimplePageWithPartitionConstant() {
PartitionedOutputOperator partitionedOutputOperator = partitionedOutputOperator(BIGINT).withPartitionConstants(ImmutableList.of(Optional.of(new NullableValue(BIGINT, 1L)))).withPartitionChannels(-1).build();
Page page = new Page(createLongsBlock(0L, 1L, 2L, 3L, null));
List<Object> allValues = readLongs(Stream.of(page), 0);
processPages(partitionedOutputOperator, page);
List<Object> partition0 = readLongs(outputBuffer.getEnqueuedDeserialized(0), 0);
assertThat(partition0).isEmpty();
List<Object> partition1 = readLongs(outputBuffer.getEnqueuedDeserialized(1), 0);
assertThat(partition1).containsExactlyElementsOf(allValues);
}
Aggregations