use of io.prestosql.operator.OperatorStats in project hetu-core by openlookeng.
the class PlanNodeStatsSummarizer method getPlanNodeStats.
private static List<PlanNodeStats> getPlanNodeStats(TaskStats taskStats) {
// Best effort to reconstruct the plan nodes from operators.
// Because stats are collected separately from query execution,
// it's possible that some or all of them are missing or out of date.
// For example, a LIMIT clause can cause a query to finish before stats
// are collected from the leaf stages.
Set<PlanNodeId> planNodeIds = new HashSet<>();
Map<PlanNodeId, Long> planNodeInputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeInputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeOutputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeOutputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeScheduledMillis = new HashMap<>();
Map<PlanNodeId, Long> planNodeCpuMillis = new HashMap<>();
Map<PlanNodeId, Map<String, OperatorInputStats>> operatorInputStats = new HashMap<>();
Map<PlanNodeId, Map<String, OperatorHashCollisionsStats>> operatorHashCollisionsStats = new HashMap<>();
Map<PlanNodeId, WindowOperatorStats> windowNodeStats = new HashMap<>();
for (PipelineStats pipelineStats : taskStats.getPipelines()) {
// Due to eventual consistently collected stats, these could be empty
if (pipelineStats.getOperatorSummaries().isEmpty()) {
continue;
}
Set<PlanNodeId> processedNodes = new HashSet<>();
PlanNodeId inputPlanNode = pipelineStats.getOperatorSummaries().iterator().next().getPlanNodeId();
PlanNodeId outputPlanNode = getLast(pipelineStats.getOperatorSummaries()).getPlanNodeId();
// Gather input statistics
for (OperatorStats operatorStats : pipelineStats.getOperatorSummaries()) {
PlanNodeId planNodeId = operatorStats.getPlanNodeId();
planNodeIds.add(planNodeId);
long scheduledMillis = operatorStats.getAddInputWall().toMillis() + operatorStats.getGetOutputWall().toMillis() + operatorStats.getFinishWall().toMillis();
planNodeScheduledMillis.merge(planNodeId, scheduledMillis, Long::sum);
long cpuMillis = operatorStats.getAddInputCpu().toMillis() + operatorStats.getGetOutputCpu().toMillis() + operatorStats.getFinishCpu().toMillis();
planNodeCpuMillis.merge(planNodeId, cpuMillis, Long::sum);
// A pipeline like hash build before join might link to another "internal" pipelines which provide actual input for this plan node
if (operatorStats.getPlanNodeId().equals(inputPlanNode) && !pipelineStats.isInputPipeline()) {
continue;
}
if (processedNodes.contains(planNodeId)) {
continue;
}
operatorInputStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorInputStats(operatorStats.getTotalDrivers(), operatorStats.getInputPositions(), operatorStats.getSumSquaredInputPositions())), (map1, map2) -> mergeMaps(map1, map2, OperatorInputStats::merge));
if (operatorStats.getInfo() instanceof HashCollisionsInfo) {
HashCollisionsInfo hashCollisionsInfo = (HashCollisionsInfo) operatorStats.getInfo();
operatorHashCollisionsStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorHashCollisionsStats(hashCollisionsInfo.getWeightedHashCollisions(), hashCollisionsInfo.getWeightedSumSquaredHashCollisions(), hashCollisionsInfo.getWeightedExpectedHashCollisions())), (map1, map2) -> mergeMaps(map1, map2, OperatorHashCollisionsStats::merge));
}
// The only statistics we have for Window Functions are very low level, thus displayed only in VERBOSE mode
if (operatorStats.getInfo() instanceof WindowInfo) {
WindowInfo windowInfo = (WindowInfo) operatorStats.getInfo();
windowNodeStats.merge(planNodeId, WindowOperatorStats.create(windowInfo), (left, right) -> left.mergeWith(right));
}
planNodeInputPositions.merge(planNodeId, operatorStats.getInputPositions(), Long::sum);
planNodeInputBytes.merge(planNodeId, operatorStats.getInputDataSize().toBytes(), Long::sum);
processedNodes.add(planNodeId);
}
// Gather output statistics
processedNodes.clear();
for (OperatorStats operatorStats : reverse(pipelineStats.getOperatorSummaries())) {
PlanNodeId planNodeId = operatorStats.getPlanNodeId();
// An "internal" pipeline like a hash build, links to another pipeline which is the actual output for this plan node
if (operatorStats.getPlanNodeId().equals(outputPlanNode) && !pipelineStats.isOutputPipeline()) {
continue;
}
if (processedNodes.contains(planNodeId)) {
continue;
}
planNodeOutputPositions.merge(planNodeId, operatorStats.getOutputPositions(), Long::sum);
planNodeOutputBytes.merge(planNodeId, operatorStats.getOutputDataSize().toBytes(), Long::sum);
processedNodes.add(planNodeId);
}
}
List<PlanNodeStats> stats = new ArrayList<>();
for (PlanNodeId planNodeId : planNodeIds) {
if (!planNodeInputPositions.containsKey(planNodeId)) {
continue;
}
PlanNodeStats nodeStats;
// It's possible there will be no output stats because all the pipelines that we observed were non-output.
// For example in a query like SELECT * FROM a JOIN b ON c = d LIMIT 1
// It's possible to observe stats after the build starts, but before the probe does
// and therefore only have scheduled time, but no output stats
long outputPositions = planNodeOutputPositions.getOrDefault(planNodeId, 0L);
if (operatorHashCollisionsStats.containsKey(planNodeId)) {
nodeStats = new HashCollisionPlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), operatorHashCollisionsStats.get(planNodeId));
} else if (windowNodeStats.containsKey(planNodeId)) {
nodeStats = new WindowPlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), windowNodeStats.get(planNodeId));
} else {
nodeStats = new PlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId));
}
stats.add(nodeStats);
}
return stats;
}
use of io.prestosql.operator.OperatorStats in project hetu-core by openlookeng.
the class TestHiveDistributedJoinQueriesWithDynamicFiltering method testJoinWithEmptyBuildSide.
@Test
public void testJoinWithEmptyBuildSide() {
Session session = Session.builder(getSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, FeaturesConfig.JoinDistributionType.BROADCAST.name()).build();
DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner();
ResultWithQueryId<MaterializedResult> result = runner.executeWithQueryId(session, "SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey AND orders.totalprice = 123.4567");
assertEquals(result.getResult().getRowCount(), 0);
OperatorStats probeStats = searchScanFilterAndProjectOperatorStats(result.getQueryId(), "tpch:lineitem");
// Probe-side is not scanned at all, due to dynamic filtering:
assertEquals(probeStats.getInputPositions(), 0L);
}
use of io.prestosql.operator.OperatorStats in project hetu-core by openlookeng.
the class TestHiveDistributedJoinQueriesWithDynamicFiltering method testJoinWithSelectiveBuildSide.
@Test
public void testJoinWithSelectiveBuildSide() {
Session session = Session.builder(getSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, FeaturesConfig.JoinDistributionType.BROADCAST.name()).build();
DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner();
ResultWithQueryId<MaterializedResult> result = runner.executeWithQueryId(session, "SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey AND orders.custkey = 1");
assertGreaterThan(result.getResult().getRowCount(), 0);
OperatorStats probeStats = searchScanFilterAndProjectOperatorStats(result.getQueryId(), "tpch:lineitem");
// Probe side may be partially scanned, depending on the drivers' scheduling:
assertLessThanOrEqual(probeStats.getInputPositions(), countRows("lineitem"));
}
use of io.prestosql.operator.OperatorStats in project hetu-core by openlookeng.
the class TestHiveDistributedJoinQueriesWithDynamicFiltering method searchScanFilterAndProjectOperatorStats.
private OperatorStats searchScanFilterAndProjectOperatorStats(QueryId queryId, String tableName) {
DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner();
Plan plan = runner.getQueryPlan(queryId);
PlanNodeId nodeId = PlanNodeSearcher.searchFrom(plan.getRoot()).where(node -> {
if (!(node instanceof ProjectNode)) {
return false;
}
ProjectNode projectNode = (ProjectNode) node;
FilterNode filterNode = (FilterNode) projectNode.getSource();
TableScanNode tableScanNode = (TableScanNode) filterNode.getSource();
return tableName.equals(tableScanNode.getTable().getConnectorHandle().toString());
}).findOnlyElement().getId();
return runner.getCoordinator().getQueryManager().getFullQueryInfo(queryId).getQueryStats().getOperatorSummaries().stream().filter(summary -> nodeId.equals(summary.getPlanNodeId())).collect(MoreCollectors.onlyElement());
}
use of io.prestosql.operator.OperatorStats in project hetu-core by openlookeng.
the class QueryMonitor method createQueryStatistics.
private QueryStatistics createQueryStatistics(QueryInfo queryInfo) {
ImmutableList.Builder<String> operatorSummaries = ImmutableList.builder();
for (OperatorStats summary : queryInfo.getQueryStats().getOperatorSummaries()) {
operatorSummaries.add(operatorStatsCodec.toJson(summary));
}
Optional<StatsAndCosts> planNodeStatsAndCosts = queryInfo.getOutputStage().map(StatsAndCosts::create);
Optional<String> serializedPlanNodeStatsAndCosts = planNodeStatsAndCosts.map(statsAndCostsCodec::toJson);
QueryStats queryStats = queryInfo.getQueryStats();
return new QueryStatistics(ofMillis(queryStats.getTotalCpuTime().toMillis()), ofMillis(queryStats.getTotalScheduledTime().toMillis()), ofMillis(queryStats.getQueuedTime().toMillis()), Optional.of(ofMillis(queryStats.getResourceWaitingTime().toMillis())), Optional.of(ofMillis(queryStats.getAnalysisTime().toMillis())), Optional.of(ofMillis(queryStats.getDistributedPlanningTime().toMillis())), queryStats.getPeakUserMemoryReservation().toBytes(), queryStats.getPeakTotalMemoryReservation().toBytes(), queryStats.getPeakTaskUserMemory().toBytes(), queryStats.getPeakTaskTotalMemory().toBytes(), queryStats.getPhysicalInputDataSize().toBytes(), queryStats.getPhysicalInputPositions(), queryStats.getInternalNetworkInputDataSize().toBytes(), queryStats.getInternalNetworkInputPositions(), queryStats.getRawInputDataSize().toBytes(), queryStats.getRawInputPositions(), queryStats.getOutputDataSize().toBytes(), queryStats.getOutputPositions(), queryStats.getLogicalWrittenDataSize().toBytes(), queryStats.getWrittenPositions(), queryStats.getCumulativeUserMemory(), queryStats.getStageGcStatistics(), queryStats.getCompletedDrivers(), queryInfo.isCompleteInfo(), getCpuDistributions(queryInfo), operatorSummaries.build(), serializedPlanNodeStatsAndCosts);
}
Aggregations