Search in sources :

Example 1 with WindowInfo

use of com.facebook.presto.operator.WindowInfo in project presto by prestodb.

the class PlanNodeStatsSummarizer method getPlanNodeStats.

private static List<PlanNodeStats> getPlanNodeStats(TaskStats taskStats) {
    // Best effort to reconstruct the plan nodes from operators.
    // Because stats are collected separately from query execution,
    // it's possible that some or all of them are missing or out of date.
    // For example, a LIMIT clause can cause a query to finish before stats
    // are collected from the leaf stages.
    Set<PlanNodeId> planNodeIds = new HashSet<>();
    Map<PlanNodeId, Long> planNodeInputPositions = new HashMap<>();
    Map<PlanNodeId, Long> planNodeInputBytes = new HashMap<>();
    Map<PlanNodeId, Long> planNodeRawInputPositions = new HashMap<>();
    Map<PlanNodeId, Long> planNodeRawInputBytes = new HashMap<>();
    Map<PlanNodeId, Long> planNodeOutputPositions = new HashMap<>();
    Map<PlanNodeId, Long> planNodeOutputBytes = new HashMap<>();
    Map<PlanNodeId, Long> planNodeScheduledMillis = new HashMap<>();
    Map<PlanNodeId, Long> planNodeCpuMillis = new HashMap<>();
    Map<PlanNodeId, Map<String, OperatorInputStats>> operatorInputStats = new HashMap<>();
    Map<PlanNodeId, Map<String, OperatorHashCollisionsStats>> operatorHashCollisionsStats = new HashMap<>();
    Map<PlanNodeId, WindowOperatorStats> windowNodeStats = new HashMap<>();
    for (PipelineStats pipelineStats : taskStats.getPipelines()) {
        // Due to eventual consistently collected stats, these could be empty
        if (pipelineStats.getOperatorSummaries().isEmpty()) {
            continue;
        }
        Set<PlanNodeId> processedNodes = new HashSet<>();
        PlanNodeId inputPlanNode = pipelineStats.getOperatorSummaries().iterator().next().getPlanNodeId();
        PlanNodeId outputPlanNode = getLast(pipelineStats.getOperatorSummaries()).getPlanNodeId();
        // Gather input statistics
        for (OperatorStats operatorStats : pipelineStats.getOperatorSummaries()) {
            PlanNodeId planNodeId = operatorStats.getPlanNodeId();
            planNodeIds.add(planNodeId);
            long scheduledMillis = operatorStats.getAddInputWall().toMillis() + operatorStats.getGetOutputWall().toMillis() + operatorStats.getFinishWall().toMillis();
            planNodeScheduledMillis.merge(planNodeId, scheduledMillis, Long::sum);
            long cpuMillis = operatorStats.getAddInputCpu().toMillis() + operatorStats.getGetOutputCpu().toMillis() + operatorStats.getFinishCpu().toMillis();
            planNodeCpuMillis.merge(planNodeId, cpuMillis, Long::sum);
            // A pipeline like hash build before join might link to another "internal" pipelines which provide actual input for this plan node
            if (operatorStats.getPlanNodeId().equals(inputPlanNode) && !pipelineStats.isInputPipeline()) {
                continue;
            }
            if (processedNodes.contains(planNodeId)) {
                continue;
            }
            operatorInputStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorInputStats(operatorStats.getTotalDrivers(), operatorStats.getInputPositions(), operatorStats.getSumSquaredInputPositions())), (map1, map2) -> mergeMaps(map1, map2, OperatorInputStats::merge));
            if (operatorStats.getInfo() instanceof HashCollisionsInfo) {
                HashCollisionsInfo hashCollisionsInfo = (HashCollisionsInfo) operatorStats.getInfo();
                operatorHashCollisionsStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorHashCollisionsStats(hashCollisionsInfo.getWeightedHashCollisions(), hashCollisionsInfo.getWeightedSumSquaredHashCollisions(), hashCollisionsInfo.getWeightedExpectedHashCollisions())), (map1, map2) -> mergeMaps(map1, map2, OperatorHashCollisionsStats::merge));
            }
            // The only statistics we have for Window Functions are very low level, thus displayed only in VERBOSE mode
            if (operatorStats.getInfo() instanceof WindowInfo) {
                WindowInfo windowInfo = (WindowInfo) operatorStats.getInfo();
                windowNodeStats.merge(planNodeId, WindowOperatorStats.create(windowInfo), (left, right) -> left.mergeWith(right));
            }
            planNodeInputPositions.merge(planNodeId, operatorStats.getInputPositions(), Long::sum);
            planNodeInputBytes.merge(planNodeId, operatorStats.getInputDataSize().toBytes(), Long::sum);
            planNodeRawInputPositions.merge(planNodeId, operatorStats.getRawInputPositions(), Long::sum);
            planNodeRawInputBytes.merge(planNodeId, operatorStats.getRawInputDataSize().toBytes(), Long::sum);
            processedNodes.add(planNodeId);
        }
        // Gather output statistics
        processedNodes.clear();
        for (OperatorStats operatorStats : reverse(pipelineStats.getOperatorSummaries())) {
            PlanNodeId planNodeId = operatorStats.getPlanNodeId();
            // An "internal" pipeline like a hash build, links to another pipeline which is the actual output for this plan node
            if (operatorStats.getPlanNodeId().equals(outputPlanNode) && !pipelineStats.isOutputPipeline()) {
                continue;
            }
            if (processedNodes.contains(planNodeId)) {
                continue;
            }
            planNodeOutputPositions.merge(planNodeId, operatorStats.getOutputPositions(), Long::sum);
            planNodeOutputBytes.merge(planNodeId, operatorStats.getOutputDataSize().toBytes(), Long::sum);
            processedNodes.add(planNodeId);
        }
    }
    List<PlanNodeStats> stats = new ArrayList<>();
    for (PlanNodeId planNodeId : planNodeIds) {
        if (!planNodeInputPositions.containsKey(planNodeId)) {
            continue;
        }
        PlanNodeStats nodeStats;
        // It's possible there will be no output stats because all the pipelines that we observed were non-output.
        // For example in a query like SELECT * FROM a JOIN b ON c = d LIMIT 1
        // It's possible to observe stats after the build starts, but before the probe does
        // and therefore only have scheduled time, but no output stats
        long outputPositions = planNodeOutputPositions.getOrDefault(planNodeId, 0L);
        if (operatorHashCollisionsStats.containsKey(planNodeId)) {
            nodeStats = new HashCollisionPlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), planNodeRawInputPositions.get(planNodeId), succinctDataSize(planNodeRawInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), operatorHashCollisionsStats.get(planNodeId));
        } else if (windowNodeStats.containsKey(planNodeId)) {
            nodeStats = new WindowPlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), planNodeRawInputPositions.get(planNodeId), succinctDataSize(planNodeRawInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), windowNodeStats.get(planNodeId));
        } else {
            nodeStats = new PlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), planNodeRawInputPositions.get(planNodeId), succinctDataSize(planNodeRawInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId));
        }
        stats.add(nodeStats);
    }
    return stats;
}
Also used : PipelineStats(com.facebook.presto.operator.PipelineStats) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Duration(io.airlift.units.Duration) OperatorStats(com.facebook.presto.operator.OperatorStats) HashCollisionsInfo(com.facebook.presto.operator.HashCollisionsInfo) WindowInfo(com.facebook.presto.operator.WindowInfo) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Aggregations

HashCollisionsInfo (com.facebook.presto.operator.HashCollisionsInfo)1 OperatorStats (com.facebook.presto.operator.OperatorStats)1 PipelineStats (com.facebook.presto.operator.PipelineStats)1 WindowInfo (com.facebook.presto.operator.WindowInfo)1 PlanNodeId (com.facebook.presto.spi.plan.PlanNodeId)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Duration (io.airlift.units.Duration)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1