use of com.facebook.presto.operator.HashCollisionsInfo in project presto by prestodb.
the class PlanPrinter method getPlanNodeStats.
private static List<PlanNodeStats> getPlanNodeStats(TaskStats taskStats) {
// Best effort to reconstruct the plan nodes from operators.
// Because stats are collected separately from query execution,
// it's possible that some or all of them are missing or out of date.
// For example, a LIMIT clause can cause a query to finish before stats
// are collected from the leaf stages.
Map<PlanNodeId, Long> planNodeInputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeInputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeOutputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeOutputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeWallMillis = new HashMap<>();
Map<PlanNodeId, Map<String, OperatorInputStats>> operatorInputStats = new HashMap<>();
Map<PlanNodeId, Map<String, OperatorHashCollisionsStats>> operatorHashCollisionsStats = new HashMap<>();
for (PipelineStats pipelineStats : taskStats.getPipelines()) {
// Due to eventual consistently collected stats, these could be empty
if (pipelineStats.getOperatorSummaries().isEmpty()) {
continue;
}
Set<PlanNodeId> processedNodes = new HashSet<>();
PlanNodeId inputPlanNode = pipelineStats.getOperatorSummaries().iterator().next().getPlanNodeId();
PlanNodeId outputPlanNode = getLast(pipelineStats.getOperatorSummaries()).getPlanNodeId();
// Gather input statistics
for (OperatorStats operatorStats : pipelineStats.getOperatorSummaries()) {
PlanNodeId planNodeId = operatorStats.getPlanNodeId();
long wall = operatorStats.getAddInputWall().toMillis() + operatorStats.getGetOutputWall().toMillis() + operatorStats.getFinishWall().toMillis();
planNodeWallMillis.merge(planNodeId, wall, Long::sum);
// A pipeline like hash build before join might link to another "internal" pipelines which provide actual input for this plan node
if (operatorStats.getPlanNodeId().equals(inputPlanNode) && !pipelineStats.isInputPipeline()) {
continue;
}
if (processedNodes.contains(planNodeId)) {
continue;
}
operatorInputStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorInputStats(operatorStats.getTotalDrivers(), operatorStats.getInputPositions(), operatorStats.getSumSquaredInputPositions())), PlanPrinter::mergeOperatorInputStatsMaps);
if (operatorStats.getInfo() instanceof HashCollisionsInfo) {
HashCollisionsInfo hashCollisionsInfo = (HashCollisionsInfo) operatorStats.getInfo();
operatorHashCollisionsStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorHashCollisionsStats(hashCollisionsInfo.getWeightedHashCollisions(), hashCollisionsInfo.getWeightedSumSquaredHashCollisions(), hashCollisionsInfo.getWeightedExpectedHashCollisions())), PlanPrinter::mergeOperatorHashCollisionsStatsMaps);
}
planNodeInputPositions.merge(planNodeId, operatorStats.getInputPositions(), Long::sum);
planNodeInputBytes.merge(planNodeId, operatorStats.getInputDataSize().toBytes(), Long::sum);
processedNodes.add(planNodeId);
}
// Gather output statistics
processedNodes.clear();
for (OperatorStats operatorStats : reverse(pipelineStats.getOperatorSummaries())) {
PlanNodeId planNodeId = operatorStats.getPlanNodeId();
// An "internal" pipeline like a hash build, links to another pipeline which is the actual output for this plan node
if (operatorStats.getPlanNodeId().equals(outputPlanNode) && !pipelineStats.isOutputPipeline()) {
continue;
}
if (processedNodes.contains(planNodeId)) {
continue;
}
planNodeOutputPositions.merge(planNodeId, operatorStats.getOutputPositions(), Long::sum);
planNodeOutputBytes.merge(planNodeId, operatorStats.getOutputDataSize().toBytes(), Long::sum);
processedNodes.add(planNodeId);
}
}
List<PlanNodeStats> stats = new ArrayList<>();
for (Map.Entry<PlanNodeId, Long> entry : planNodeWallMillis.entrySet()) {
PlanNodeId planNodeId = entry.getKey();
stats.add(new PlanNodeStats(planNodeId, new Duration(planNodeWallMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), // and therefore only have wall time, but no output stats
planNodeOutputPositions.getOrDefault(planNodeId, 0L), succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), // Only some operators emit hash collisions statistics
operatorHashCollisionsStats.getOrDefault(planNodeId, emptyMap())));
}
return stats;
}
Aggregations