Search in sources :

Example 1 with OperatorStats

use of io.prestosql.operator.OperatorStats in project hetu-core by openlookeng.

the class PlanNodeStatsSummarizer method getPlanNodeStats.

private static List<PlanNodeStats> getPlanNodeStats(TaskStats taskStats) {
    // Best effort to reconstruct the plan nodes from operators.
    // Because stats are collected separately from query execution,
    // it's possible that some or all of them are missing or out of date.
    // For example, a LIMIT clause can cause a query to finish before stats
    // are collected from the leaf stages.
    Set<PlanNodeId> planNodeIds = new HashSet<>();
    Map<PlanNodeId, Long> planNodeInputPositions = new HashMap<>();
    Map<PlanNodeId, Long> planNodeInputBytes = new HashMap<>();
    Map<PlanNodeId, Long> planNodeOutputPositions = new HashMap<>();
    Map<PlanNodeId, Long> planNodeOutputBytes = new HashMap<>();
    Map<PlanNodeId, Long> planNodeScheduledMillis = new HashMap<>();
    Map<PlanNodeId, Long> planNodeCpuMillis = new HashMap<>();
    Map<PlanNodeId, Map<String, OperatorInputStats>> operatorInputStats = new HashMap<>();
    Map<PlanNodeId, Map<String, OperatorHashCollisionsStats>> operatorHashCollisionsStats = new HashMap<>();
    Map<PlanNodeId, WindowOperatorStats> windowNodeStats = new HashMap<>();
    for (PipelineStats pipelineStats : taskStats.getPipelines()) {
        // Due to eventual consistently collected stats, these could be empty
        if (pipelineStats.getOperatorSummaries().isEmpty()) {
            continue;
        }
        Set<PlanNodeId> processedNodes = new HashSet<>();
        PlanNodeId inputPlanNode = pipelineStats.getOperatorSummaries().iterator().next().getPlanNodeId();
        PlanNodeId outputPlanNode = getLast(pipelineStats.getOperatorSummaries()).getPlanNodeId();
        // Gather input statistics
        for (OperatorStats operatorStats : pipelineStats.getOperatorSummaries()) {
            PlanNodeId planNodeId = operatorStats.getPlanNodeId();
            planNodeIds.add(planNodeId);
            long scheduledMillis = operatorStats.getAddInputWall().toMillis() + operatorStats.getGetOutputWall().toMillis() + operatorStats.getFinishWall().toMillis();
            planNodeScheduledMillis.merge(planNodeId, scheduledMillis, Long::sum);
            long cpuMillis = operatorStats.getAddInputCpu().toMillis() + operatorStats.getGetOutputCpu().toMillis() + operatorStats.getFinishCpu().toMillis();
            planNodeCpuMillis.merge(planNodeId, cpuMillis, Long::sum);
            // A pipeline like hash build before join might link to another "internal" pipelines which provide actual input for this plan node
            if (operatorStats.getPlanNodeId().equals(inputPlanNode) && !pipelineStats.isInputPipeline()) {
                continue;
            }
            if (processedNodes.contains(planNodeId)) {
                continue;
            }
            operatorInputStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorInputStats(operatorStats.getTotalDrivers(), operatorStats.getInputPositions(), operatorStats.getSumSquaredInputPositions())), (map1, map2) -> mergeMaps(map1, map2, OperatorInputStats::merge));
            if (operatorStats.getInfo() instanceof HashCollisionsInfo) {
                HashCollisionsInfo hashCollisionsInfo = (HashCollisionsInfo) operatorStats.getInfo();
                operatorHashCollisionsStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorHashCollisionsStats(hashCollisionsInfo.getWeightedHashCollisions(), hashCollisionsInfo.getWeightedSumSquaredHashCollisions(), hashCollisionsInfo.getWeightedExpectedHashCollisions())), (map1, map2) -> mergeMaps(map1, map2, OperatorHashCollisionsStats::merge));
            }
            // The only statistics we have for Window Functions are very low level, thus displayed only in VERBOSE mode
            if (operatorStats.getInfo() instanceof WindowInfo) {
                WindowInfo windowInfo = (WindowInfo) operatorStats.getInfo();
                windowNodeStats.merge(planNodeId, WindowOperatorStats.create(windowInfo), (left, right) -> left.mergeWith(right));
            }
            planNodeInputPositions.merge(planNodeId, operatorStats.getInputPositions(), Long::sum);
            planNodeInputBytes.merge(planNodeId, operatorStats.getInputDataSize().toBytes(), Long::sum);
            processedNodes.add(planNodeId);
        }
        // Gather output statistics
        processedNodes.clear();
        for (OperatorStats operatorStats : reverse(pipelineStats.getOperatorSummaries())) {
            PlanNodeId planNodeId = operatorStats.getPlanNodeId();
            // An "internal" pipeline like a hash build, links to another pipeline which is the actual output for this plan node
            if (operatorStats.getPlanNodeId().equals(outputPlanNode) && !pipelineStats.isOutputPipeline()) {
                continue;
            }
            if (processedNodes.contains(planNodeId)) {
                continue;
            }
            planNodeOutputPositions.merge(planNodeId, operatorStats.getOutputPositions(), Long::sum);
            planNodeOutputBytes.merge(planNodeId, operatorStats.getOutputDataSize().toBytes(), Long::sum);
            processedNodes.add(planNodeId);
        }
    }
    List<PlanNodeStats> stats = new ArrayList<>();
    for (PlanNodeId planNodeId : planNodeIds) {
        if (!planNodeInputPositions.containsKey(planNodeId)) {
            continue;
        }
        PlanNodeStats nodeStats;
        // It's possible there will be no output stats because all the pipelines that we observed were non-output.
        // For example in a query like SELECT * FROM a JOIN b ON c = d LIMIT 1
        // It's possible to observe stats after the build starts, but before the probe does
        // and therefore only have scheduled time, but no output stats
        long outputPositions = planNodeOutputPositions.getOrDefault(planNodeId, 0L);
        if (operatorHashCollisionsStats.containsKey(planNodeId)) {
            nodeStats = new HashCollisionPlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), operatorHashCollisionsStats.get(planNodeId));
        } else if (windowNodeStats.containsKey(planNodeId)) {
            nodeStats = new WindowPlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), windowNodeStats.get(planNodeId));
        } else {
            nodeStats = new PlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId));
        }
        stats.add(nodeStats);
    }
    return stats;
}
Also used : PipelineStats(io.prestosql.operator.PipelineStats) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Duration(io.airlift.units.Duration) OperatorStats(io.prestosql.operator.OperatorStats) HashCollisionsInfo(io.prestosql.operator.HashCollisionsInfo) WindowInfo(io.prestosql.operator.WindowInfo) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Example 2 with OperatorStats

use of io.prestosql.operator.OperatorStats in project hetu-core by openlookeng.

the class TestHiveDistributedJoinQueriesWithDynamicFiltering method testJoinWithEmptyBuildSide.

@Test
public void testJoinWithEmptyBuildSide() {
    Session session = Session.builder(getSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, FeaturesConfig.JoinDistributionType.BROADCAST.name()).build();
    DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner();
    ResultWithQueryId<MaterializedResult> result = runner.executeWithQueryId(session, "SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey AND orders.totalprice = 123.4567");
    assertEquals(result.getResult().getRowCount(), 0);
    OperatorStats probeStats = searchScanFilterAndProjectOperatorStats(result.getQueryId(), "tpch:lineitem");
    // Probe-side is not scanned at all, due to dynamic filtering:
    assertEquals(probeStats.getInputPositions(), 0L);
}
Also used : DistributedQueryRunner(io.prestosql.tests.DistributedQueryRunner) OperatorStats(io.prestosql.operator.OperatorStats) MaterializedResult(io.prestosql.testing.MaterializedResult) Session(io.prestosql.Session) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) Test(org.testng.annotations.Test)

Example 3 with OperatorStats

use of io.prestosql.operator.OperatorStats in project hetu-core by openlookeng.

the class TestHiveDistributedJoinQueriesWithDynamicFiltering method testJoinWithSelectiveBuildSide.

@Test
public void testJoinWithSelectiveBuildSide() {
    Session session = Session.builder(getSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, FeaturesConfig.JoinDistributionType.BROADCAST.name()).build();
    DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner();
    ResultWithQueryId<MaterializedResult> result = runner.executeWithQueryId(session, "SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey AND orders.custkey = 1");
    assertGreaterThan(result.getResult().getRowCount(), 0);
    OperatorStats probeStats = searchScanFilterAndProjectOperatorStats(result.getQueryId(), "tpch:lineitem");
    // Probe side may be partially scanned, depending on the drivers' scheduling:
    assertLessThanOrEqual(probeStats.getInputPositions(), countRows("lineitem"));
}
Also used : DistributedQueryRunner(io.prestosql.tests.DistributedQueryRunner) OperatorStats(io.prestosql.operator.OperatorStats) MaterializedResult(io.prestosql.testing.MaterializedResult) Session(io.prestosql.Session) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) Test(org.testng.annotations.Test)

Example 4 with OperatorStats

use of io.prestosql.operator.OperatorStats in project hetu-core by openlookeng.

the class TestHiveDistributedJoinQueriesWithDynamicFiltering method searchScanFilterAndProjectOperatorStats.

private OperatorStats searchScanFilterAndProjectOperatorStats(QueryId queryId, String tableName) {
    DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner();
    Plan plan = runner.getQueryPlan(queryId);
    PlanNodeId nodeId = PlanNodeSearcher.searchFrom(plan.getRoot()).where(node -> {
        if (!(node instanceof ProjectNode)) {
            return false;
        }
        ProjectNode projectNode = (ProjectNode) node;
        FilterNode filterNode = (FilterNode) projectNode.getSource();
        TableScanNode tableScanNode = (TableScanNode) filterNode.getSource();
        return tableName.equals(tableScanNode.getTable().getConnectorHandle().toString());
    }).findOnlyElement().getId();
    return runner.getCoordinator().getQueryManager().getFullQueryInfo(queryId).getQueryStats().getOperatorSummaries().stream().filter(summary -> nodeId.equals(summary.getPlanNodeId())).collect(MoreCollectors.onlyElement());
}
Also used : PlanNodeId(io.prestosql.spi.plan.PlanNodeId) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) HiveTestUtils.getDefaultHiveSelectiveFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveSelectiveFactories) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) Plan(io.prestosql.sql.planner.Plan) Test(org.testng.annotations.Test) MaterializedResult(io.prestosql.testing.MaterializedResult) TypeSignature.parseTypeSignature(io.prestosql.spi.type.TypeSignature.parseTypeSignature) FilterNode(io.prestosql.spi.plan.FilterNode) ENABLE_DYNAMIC_FILTERING(io.prestosql.SystemSessionProperties.ENABLE_DYNAMIC_FILTERING) DistributedQueryRunner(io.prestosql.tests.DistributedQueryRunner) Map(java.util.Map) Assert.assertFalse(org.testng.Assert.assertFalse) HIVE_INT(io.prestosql.plugin.hive.HiveType.HIVE_INT) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) PrestoException(io.prestosql.spi.PrestoException) ImmutableMap(com.google.common.collect.ImmutableMap) HiveTestUtils.createTestHdfsEnvironment(io.prestosql.plugin.hive.HiveTestUtils.createTestHdfsEnvironment) MoreCollectors(com.google.common.collect.MoreCollectors) TableScanNode(io.prestosql.spi.plan.TableScanNode) ProjectNode(io.prestosql.spi.plan.ProjectNode) HiveQueryRunner.createQueryRunnerWithStateStore(io.prestosql.plugin.hive.HiveQueryRunner.createQueryRunnerWithStateStore) TpchTable.getTables(io.airlift.tpch.TpchTable.getTables) AbstractTestQueryFramework(io.prestosql.tests.AbstractTestQueryFramework) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Optional(java.util.Optional) OperatorStats(io.prestosql.operator.OperatorStats) JOIN_DISTRIBUTION_TYPE(io.prestosql.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE) StandardTypes(io.prestosql.spi.type.StandardTypes) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FixedPageSource(io.prestosql.spi.connector.FixedPageSource) TYPE_MANAGER(io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) Supplier(java.util.function.Supplier) BloomFilter(io.prestosql.spi.util.BloomFilter) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertLessThanOrEqual(io.airlift.testing.Assertions.assertLessThanOrEqual) PARTITION_KEY(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) Session(io.prestosql.Session) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier) QueryId(io.prestosql.spi.QueryId) HiveTestUtils.getNoOpIndexCache(io.prestosql.plugin.hive.HiveTestUtils.getNoOpIndexCache) Properties(java.util.Properties) DYNAMIC_FILTERING_WAIT_TIME(io.prestosql.SystemSessionProperties.DYNAMIC_FILTERING_WAIT_TIME) ResultWithQueryId(io.prestosql.tests.ResultWithQueryId) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) HiveTestUtils.getDefaultHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProvider) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) HiveTestUtils.getDefaultHiveDataStreamFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveDataStreamFactories) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) PlanNodeSearcher(io.prestosql.sql.planner.optimizations.PlanNodeSearcher) FeaturesConfig(io.prestosql.sql.analyzer.FeaturesConfig) Assert.assertTrue(org.testng.Assert.assertTrue) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) DynamicFilterFactory(io.prestosql.spi.dynamicfilter.DynamicFilterFactory) DistributedQueryRunner(io.prestosql.tests.DistributedQueryRunner) TableScanNode(io.prestosql.spi.plan.TableScanNode) FilterNode(io.prestosql.spi.plan.FilterNode) ProjectNode(io.prestosql.spi.plan.ProjectNode) Plan(io.prestosql.sql.planner.Plan)

Example 5 with OperatorStats

use of io.prestosql.operator.OperatorStats in project hetu-core by openlookeng.

the class QueryMonitor method createQueryStatistics.

private QueryStatistics createQueryStatistics(QueryInfo queryInfo) {
    ImmutableList.Builder<String> operatorSummaries = ImmutableList.builder();
    for (OperatorStats summary : queryInfo.getQueryStats().getOperatorSummaries()) {
        operatorSummaries.add(operatorStatsCodec.toJson(summary));
    }
    Optional<StatsAndCosts> planNodeStatsAndCosts = queryInfo.getOutputStage().map(StatsAndCosts::create);
    Optional<String> serializedPlanNodeStatsAndCosts = planNodeStatsAndCosts.map(statsAndCostsCodec::toJson);
    QueryStats queryStats = queryInfo.getQueryStats();
    return new QueryStatistics(ofMillis(queryStats.getTotalCpuTime().toMillis()), ofMillis(queryStats.getTotalScheduledTime().toMillis()), ofMillis(queryStats.getQueuedTime().toMillis()), Optional.of(ofMillis(queryStats.getResourceWaitingTime().toMillis())), Optional.of(ofMillis(queryStats.getAnalysisTime().toMillis())), Optional.of(ofMillis(queryStats.getDistributedPlanningTime().toMillis())), queryStats.getPeakUserMemoryReservation().toBytes(), queryStats.getPeakTotalMemoryReservation().toBytes(), queryStats.getPeakTaskUserMemory().toBytes(), queryStats.getPeakTaskTotalMemory().toBytes(), queryStats.getPhysicalInputDataSize().toBytes(), queryStats.getPhysicalInputPositions(), queryStats.getInternalNetworkInputDataSize().toBytes(), queryStats.getInternalNetworkInputPositions(), queryStats.getRawInputDataSize().toBytes(), queryStats.getRawInputPositions(), queryStats.getOutputDataSize().toBytes(), queryStats.getOutputPositions(), queryStats.getLogicalWrittenDataSize().toBytes(), queryStats.getWrittenPositions(), queryStats.getCumulativeUserMemory(), queryStats.getStageGcStatistics(), queryStats.getCompletedDrivers(), queryInfo.isCompleteInfo(), getCpuDistributions(queryInfo), operatorSummaries.build(), serializedPlanNodeStatsAndCosts);
}
Also used : QueryStats(io.prestosql.execution.QueryStats) QueryStatistics(io.prestosql.spi.eventlistener.QueryStatistics) ImmutableList(com.google.common.collect.ImmutableList) StatsAndCosts(io.prestosql.cost.StatsAndCosts) OperatorStats(io.prestosql.operator.OperatorStats)

Aggregations

OperatorStats (io.prestosql.operator.OperatorStats)10 Session (io.prestosql.Session)6 MaterializedResult (io.prestosql.testing.MaterializedResult)6 TestingConnectorSession (io.prestosql.testing.TestingConnectorSession)6 DistributedQueryRunner (io.prestosql.tests.DistributedQueryRunner)6 Test (org.testng.annotations.Test)6 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 PlanNodeId (io.prestosql.spi.plan.PlanNodeId)3 Map (java.util.Map)3 MoreCollectors (com.google.common.collect.MoreCollectors)2 Assertions.assertGreaterThan (io.airlift.testing.Assertions.assertGreaterThan)2 Assertions.assertLessThanOrEqual (io.airlift.testing.Assertions.assertLessThanOrEqual)2 TpchTable.getTables (io.airlift.tpch.TpchTable.getTables)2 DYNAMIC_FILTERING_WAIT_TIME (io.prestosql.SystemSessionProperties.DYNAMIC_FILTERING_WAIT_TIME)2 ENABLE_DYNAMIC_FILTERING (io.prestosql.SystemSessionProperties.ENABLE_DYNAMIC_FILTERING)2 JOIN_DISTRIBUTION_TYPE (io.prestosql.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE)2 BlockedReason (io.prestosql.operator.BlockedReason)2 PARTITION_KEY (io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY)2 HiveQueryRunner.createQueryRunnerWithStateStore (io.prestosql.plugin.hive.HiveQueryRunner.createQueryRunnerWithStateStore)2