use of io.prestosql.spi.plan.PlanNodeId in project hetu-core by openlookeng.
the class TestDetermineSemiJoinDistributionType method testPartitionsWhenBothTablesEqual.
@Test
public void testPartitionsWhenBothTablesEqual() {
int aRows = 10_000;
int bRows = 10_000;
assertDetermineSemiJoinDistributionType().setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).overrideStats("valuesA", PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), SymbolStatsEstimate.unknown())).build()).overrideStats("valuesB", PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), SymbolStatsEstimate.unknown())).build()).on(p -> p.semiJoin(p.values(new PlanNodeId("valuesA"), aRows, p.symbol("A1", BIGINT)), p.values(new PlanNodeId("valuesB"), bRows, p.symbol("B1", BIGINT)), p.symbol("A1"), p.symbol("B1"), p.symbol("output"), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty())).matches(semiJoin("A1", "B1", "output", Optional.of(PARTITIONED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
}
use of io.prestosql.spi.plan.PlanNodeId in project hetu-core by openlookeng.
the class TestDetermineSemiJoinDistributionType method testReplicatesWhenFilterMuchSmaller.
@Test
public void testReplicatesWhenFilterMuchSmaller() {
int aRows = 10_000;
int bRows = 100;
assertDetermineSemiJoinDistributionType().setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).overrideStats("valuesA", PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), SymbolStatsEstimate.unknown())).build()).overrideStats("valuesB", PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), SymbolStatsEstimate.unknown())).build()).on(p -> p.semiJoin(p.values(new PlanNodeId("valuesA"), aRows, p.symbol("A1", BIGINT)), p.values(new PlanNodeId("valuesB"), bRows, p.symbol("B1", BIGINT)), p.symbol("A1"), p.symbol("B1"), p.symbol("output"), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty())).matches(semiJoin("A1", "B1", "output", Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
}
use of io.prestosql.spi.plan.PlanNodeId in project hetu-core by openlookeng.
the class StatsAndCosts method create.
public static StatsAndCosts create(PlanNode root, StatsProvider statsProvider, CostProvider costProvider) {
Iterable<PlanNode> planIterator = Traverser.forTree(PlanNode::getSources).depthFirstPreOrder(root);
ImmutableMap.Builder<PlanNodeId, PlanNodeStatsEstimate> tmpStats = ImmutableMap.builder();
ImmutableMap.Builder<PlanNodeId, PlanCostEstimate> tmpCosts = ImmutableMap.builder();
Set<PlanNodeId> visitedPlanNodeId = new HashSet<>();
for (PlanNode node : planIterator) {
PlanNodeId id = node.getId();
if (visitedPlanNodeId.contains(id)) {
// This can happen only incase of CTE reuse optimization.
continue;
}
visitedPlanNodeId.add(id);
tmpStats.put(id, statsProvider.getStats(node));
tmpCosts.put(id, costProvider.getCost(node));
}
return new StatsAndCosts(tmpStats.build(), tmpCosts.build());
}
use of io.prestosql.spi.plan.PlanNodeId in project hetu-core by openlookeng.
the class PlanNodeStatsSummarizer method getPlanNodeStats.
private static List<PlanNodeStats> getPlanNodeStats(TaskStats taskStats) {
// Best effort to reconstruct the plan nodes from operators.
// Because stats are collected separately from query execution,
// it's possible that some or all of them are missing or out of date.
// For example, a LIMIT clause can cause a query to finish before stats
// are collected from the leaf stages.
Set<PlanNodeId> planNodeIds = new HashSet<>();
Map<PlanNodeId, Long> planNodeInputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeInputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeOutputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeOutputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeScheduledMillis = new HashMap<>();
Map<PlanNodeId, Long> planNodeCpuMillis = new HashMap<>();
Map<PlanNodeId, Map<String, OperatorInputStats>> operatorInputStats = new HashMap<>();
Map<PlanNodeId, Map<String, OperatorHashCollisionsStats>> operatorHashCollisionsStats = new HashMap<>();
Map<PlanNodeId, WindowOperatorStats> windowNodeStats = new HashMap<>();
for (PipelineStats pipelineStats : taskStats.getPipelines()) {
// Due to eventual consistently collected stats, these could be empty
if (pipelineStats.getOperatorSummaries().isEmpty()) {
continue;
}
Set<PlanNodeId> processedNodes = new HashSet<>();
PlanNodeId inputPlanNode = pipelineStats.getOperatorSummaries().iterator().next().getPlanNodeId();
PlanNodeId outputPlanNode = getLast(pipelineStats.getOperatorSummaries()).getPlanNodeId();
// Gather input statistics
for (OperatorStats operatorStats : pipelineStats.getOperatorSummaries()) {
PlanNodeId planNodeId = operatorStats.getPlanNodeId();
planNodeIds.add(planNodeId);
long scheduledMillis = operatorStats.getAddInputWall().toMillis() + operatorStats.getGetOutputWall().toMillis() + operatorStats.getFinishWall().toMillis();
planNodeScheduledMillis.merge(planNodeId, scheduledMillis, Long::sum);
long cpuMillis = operatorStats.getAddInputCpu().toMillis() + operatorStats.getGetOutputCpu().toMillis() + operatorStats.getFinishCpu().toMillis();
planNodeCpuMillis.merge(planNodeId, cpuMillis, Long::sum);
// A pipeline like hash build before join might link to another "internal" pipelines which provide actual input for this plan node
if (operatorStats.getPlanNodeId().equals(inputPlanNode) && !pipelineStats.isInputPipeline()) {
continue;
}
if (processedNodes.contains(planNodeId)) {
continue;
}
operatorInputStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorInputStats(operatorStats.getTotalDrivers(), operatorStats.getInputPositions(), operatorStats.getSumSquaredInputPositions())), (map1, map2) -> mergeMaps(map1, map2, OperatorInputStats::merge));
if (operatorStats.getInfo() instanceof HashCollisionsInfo) {
HashCollisionsInfo hashCollisionsInfo = (HashCollisionsInfo) operatorStats.getInfo();
operatorHashCollisionsStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorHashCollisionsStats(hashCollisionsInfo.getWeightedHashCollisions(), hashCollisionsInfo.getWeightedSumSquaredHashCollisions(), hashCollisionsInfo.getWeightedExpectedHashCollisions())), (map1, map2) -> mergeMaps(map1, map2, OperatorHashCollisionsStats::merge));
}
// The only statistics we have for Window Functions are very low level, thus displayed only in VERBOSE mode
if (operatorStats.getInfo() instanceof WindowInfo) {
WindowInfo windowInfo = (WindowInfo) operatorStats.getInfo();
windowNodeStats.merge(planNodeId, WindowOperatorStats.create(windowInfo), (left, right) -> left.mergeWith(right));
}
planNodeInputPositions.merge(planNodeId, operatorStats.getInputPositions(), Long::sum);
planNodeInputBytes.merge(planNodeId, operatorStats.getInputDataSize().toBytes(), Long::sum);
processedNodes.add(planNodeId);
}
// Gather output statistics
processedNodes.clear();
for (OperatorStats operatorStats : reverse(pipelineStats.getOperatorSummaries())) {
PlanNodeId planNodeId = operatorStats.getPlanNodeId();
// An "internal" pipeline like a hash build, links to another pipeline which is the actual output for this plan node
if (operatorStats.getPlanNodeId().equals(outputPlanNode) && !pipelineStats.isOutputPipeline()) {
continue;
}
if (processedNodes.contains(planNodeId)) {
continue;
}
planNodeOutputPositions.merge(planNodeId, operatorStats.getOutputPositions(), Long::sum);
planNodeOutputBytes.merge(planNodeId, operatorStats.getOutputDataSize().toBytes(), Long::sum);
processedNodes.add(planNodeId);
}
}
List<PlanNodeStats> stats = new ArrayList<>();
for (PlanNodeId planNodeId : planNodeIds) {
if (!planNodeInputPositions.containsKey(planNodeId)) {
continue;
}
PlanNodeStats nodeStats;
// It's possible there will be no output stats because all the pipelines that we observed were non-output.
// For example in a query like SELECT * FROM a JOIN b ON c = d LIMIT 1
// It's possible to observe stats after the build starts, but before the probe does
// and therefore only have scheduled time, but no output stats
long outputPositions = planNodeOutputPositions.getOrDefault(planNodeId, 0L);
if (operatorHashCollisionsStats.containsKey(planNodeId)) {
nodeStats = new HashCollisionPlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), operatorHashCollisionsStats.get(planNodeId));
} else if (windowNodeStats.containsKey(planNodeId)) {
nodeStats = new WindowPlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), windowNodeStats.get(planNodeId));
} else {
nodeStats = new PlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId));
}
stats.add(nodeStats);
}
return stats;
}
use of io.prestosql.spi.plan.PlanNodeId in project hetu-core by openlookeng.
the class PlanPrinter method formatFragment.
private static String formatFragment(Function<TableScanNode, TableInfo> tableInfoSupplier, ValuePrinter valuePrinter, PlanFragment fragment, Optional<StageInfo> stageInfo, Optional<Map<PlanNodeId, PlanNodeStats>> planNodeStats, boolean verbose, List<PlanFragment> allFragments, Metadata metadata) {
StringBuilder builder = new StringBuilder();
builder.append(format("Fragment %s [%s]\n", fragment.getId(), fragment.getPartitioning()));
if (stageInfo.isPresent()) {
StageStats stageStats = stageInfo.get().getStageStats();
double avgPositionsPerTask = stageInfo.get().getTasks().stream().mapToLong(task -> task.getStats().getProcessedInputPositions()).average().orElse(Double.NaN);
double squaredDifferences = stageInfo.get().getTasks().stream().mapToDouble(task -> Math.pow(task.getStats().getProcessedInputPositions() - avgPositionsPerTask, 2)).sum();
double sdAmongTasks = Math.sqrt(squaredDifferences / stageInfo.get().getTasks().size());
builder.append(indentString(1)).append(format("CPU: %s, Scheduled: %s, Input: %s (%s); per task: avg.: %s std.dev.: %s, Output: %s (%s)\n", stageStats.getTotalCpuTime().convertToMostSuccinctTimeUnit(), stageStats.getTotalScheduledTime().convertToMostSuccinctTimeUnit(), formatPositions(stageStats.getProcessedInputPositions()), stageStats.getProcessedInputDataSize(), formatDouble(avgPositionsPerTask), formatDouble(sdAmongTasks), formatPositions(stageStats.getOutputPositions()), stageStats.getOutputDataSize()));
}
PartitioningScheme partitioningScheme = fragment.getPartitioningScheme();
builder.append(indentString(1)).append(format("Output layout: [%s]\n", Joiner.on(", ").join(partitioningScheme.getOutputLayout())));
boolean replicateNullsAndAny = partitioningScheme.isReplicateNullsAndAny();
List<String> arguments = partitioningScheme.getPartitioning().getArguments().stream().map(argument -> {
if (argument.isConstant()) {
NullableValue constant = argument.getConstant();
String printableValue = valuePrinter.castToVarchar(constant.getType(), constant.getValue());
return constant.getType().getDisplayName() + "(" + printableValue + ")";
}
return argument.getColumn().toString();
}).collect(toImmutableList());
builder.append(indentString(1));
if (replicateNullsAndAny) {
builder.append(format("Output partitioning: %s (replicate nulls and any) [%s]%s\n", partitioningScheme.getPartitioning().getHandle(), Joiner.on(", ").join(arguments), formatHash(partitioningScheme.getHashColumn())));
} else {
builder.append(format("Output partitioning: %s [%s]%s\n", partitioningScheme.getPartitioning().getHandle(), Joiner.on(", ").join(arguments), formatHash(partitioningScheme.getHashColumn())));
}
builder.append(indentString(1)).append(format("Stage Execution Strategy: %s\n", fragment.getStageExecutionDescriptor().getStageExecutionStrategy()));
TypeProvider typeProvider = TypeProvider.copyOf(allFragments.stream().flatMap(f -> f.getSymbols().entrySet().stream()).distinct().collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)));
builder.append(new PlanPrinter(fragment.getRoot(), typeProvider, Optional.of(fragment.getStageExecutionDescriptor()), tableInfoSupplier, valuePrinter, fragment.getStatsAndCosts(), planNodeStats, metadata).toText(verbose, 1)).append("\n");
return builder.toString();
}
Aggregations