use of com.facebook.presto.operator.OperatorStats in project presto by prestodb.
the class QueryMonitor method queryCompletedEvent.
public void queryCompletedEvent(QueryInfo queryInfo) {
try {
Optional<QueryFailureInfo> queryFailureInfo = Optional.empty();
if (queryInfo.getFailureInfo() != null) {
FailureInfo failureInfo = queryInfo.getFailureInfo();
Optional<TaskInfo> failedTask = queryInfo.getOutputStage().flatMap(QueryMonitor::findFailedTask);
queryFailureInfo = Optional.of(new QueryFailureInfo(queryInfo.getErrorCode(), Optional.ofNullable(failureInfo.getType()), Optional.ofNullable(failureInfo.getMessage()), failedTask.map(task -> task.getTaskStatus().getTaskId().toString()), failedTask.map(task -> task.getTaskStatus().getSelf().getHost()), objectMapper.writeValueAsString(queryInfo.getFailureInfo())));
}
ImmutableList.Builder<QueryInputMetadata> inputs = ImmutableList.builder();
for (Input input : queryInfo.getInputs()) {
inputs.add(new QueryInputMetadata(input.getConnectorId().getCatalogName(), input.getSchema(), input.getTable(), input.getColumns().stream().map(Column::toString).collect(Collectors.toList()), input.getConnectorInfo()));
}
QueryStats queryStats = queryInfo.getQueryStats();
Optional<QueryOutputMetadata> output = Optional.empty();
if (queryInfo.getOutput().isPresent()) {
Optional<TableFinishInfo> tableFinishInfo = queryStats.getOperatorSummaries().stream().map(OperatorStats::getInfo).filter(TableFinishInfo.class::isInstance).map(TableFinishInfo.class::cast).findFirst();
output = Optional.of(new QueryOutputMetadata(queryInfo.getOutput().get().getConnectorId().getCatalogName(), queryInfo.getOutput().get().getSchema(), queryInfo.getOutput().get().getTable(), tableFinishInfo.map(TableFinishInfo::getConnectorOutputMetadata), tableFinishInfo.map(TableFinishInfo::isJsonLengthLimitExceeded)));
}
eventListenerManager.queryCompleted(new QueryCompletedEvent(new QueryMetadata(queryInfo.getQueryId().toString(), queryInfo.getSession().getTransactionId().map(TransactionId::toString), queryInfo.getQuery(), queryInfo.getState().toString(), queryInfo.getSelf(), queryInfo.getOutputStage().flatMap(stage -> stageInfoCodec.toJsonWithLengthLimit(stage, toIntExact(config.getMaxOutputStageJsonSize().toBytes())))), new QueryStatistics(ofMillis(queryStats.getTotalCpuTime().toMillis()), ofMillis(queryStats.getTotalScheduledTime().toMillis()), ofMillis(queryStats.getQueuedTime().toMillis()), Optional.ofNullable(queryStats.getAnalysisTime()).map(duration -> ofMillis(duration.toMillis())), Optional.ofNullable(queryStats.getDistributedPlanningTime()).map(duration -> ofMillis(duration.toMillis())), queryStats.getPeakMemoryReservation().toBytes(), queryStats.getRawInputDataSize().toBytes(), queryStats.getRawInputPositions(), queryStats.getCompletedDrivers(), queryInfo.isCompleteInfo(), objectMapper.writeValueAsString(queryInfo.getQueryStats().getOperatorSummaries())), new QueryContext(queryInfo.getSession().getUser(), queryInfo.getSession().getPrincipal(), queryInfo.getSession().getRemoteUserAddress(), queryInfo.getSession().getUserAgent(), queryInfo.getSession().getClientInfo(), queryInfo.getSession().getSource(), queryInfo.getSession().getCatalog(), queryInfo.getSession().getSchema(), mergeSessionAndCatalogProperties(queryInfo), serverAddress, serverVersion, environment), new QueryIOMetadata(inputs.build(), output), queryFailureInfo, ofEpochMilli(queryStats.getCreateTime().getMillis()), ofEpochMilli(queryStats.getExecutionStartTime().getMillis()), ofEpochMilli(queryStats.getEndTime().getMillis())));
logQueryTimeline(queryInfo);
} catch (JsonProcessingException e) {
throw Throwables.propagate(e);
}
}
use of com.facebook.presto.operator.OperatorStats in project presto by prestodb.
the class TestHiveDistributedJoinQueriesWithDynamicFiltering method testJoinWithEmptyBuildSide.
@Test
public void testJoinWithEmptyBuildSide() {
Session session = Session.builder(getSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, FeaturesConfig.JoinDistributionType.BROADCAST.name()).setSystemProperty(PUSHDOWN_SUBFIELDS_ENABLED, "false").setCatalogSessionProperty(HIVE_CATALOG, PUSHDOWN_FILTER_ENABLED, "false").build();
DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner();
ResultWithQueryId<MaterializedResult> result = runner.executeWithQueryId(session, "SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey AND orders.totalprice = 123.4567");
assertEquals(result.getResult().getRowCount(), 0);
OperatorStats probeStats = searchScanFilterAndProjectOperatorStats(result.getQueryId(), "lineitem");
// Probe-side is not scanned at all, due to dynamic filtering:
assertEquals(probeStats.getInputPositions(), 0L);
}
use of com.facebook.presto.operator.OperatorStats in project presto by prestodb.
the class TestHiveDistributedJoinQueriesWithDynamicFiltering method searchScanFilterAndProjectOperatorStats.
private OperatorStats searchScanFilterAndProjectOperatorStats(QueryId queryId, String tableName) {
DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner();
Plan plan = runner.getQueryPlan(queryId);
PlanNodeId nodeId = PlanNodeSearcher.searchFrom(plan.getRoot()).where(node -> {
if (!(node instanceof ProjectNode)) {
return false;
}
ProjectNode projectNode = (ProjectNode) node;
FilterNode filterNode = (FilterNode) projectNode.getSource();
TableScanNode tableScanNode = (TableScanNode) filterNode.getSource();
return tableName.equals(((HiveTableHandle) (tableScanNode.getTable().getConnectorHandle())).getTableName());
}).findOnlyElement().getId();
return runner.getCoordinator().getQueryManager().getFullQueryInfo(queryId).getQueryStats().getOperatorSummaries().stream().filter(summary -> nodeId.equals(summary.getPlanNodeId())).collect(MoreCollectors.onlyElement());
}
use of com.facebook.presto.operator.OperatorStats in project presto by prestodb.
the class PlanNodeStatsSummarizer method getPlanNodeStats.
private static List<PlanNodeStats> getPlanNodeStats(TaskStats taskStats) {
// Best effort to reconstruct the plan nodes from operators.
// Because stats are collected separately from query execution,
// it's possible that some or all of them are missing or out of date.
// For example, a LIMIT clause can cause a query to finish before stats
// are collected from the leaf stages.
Set<PlanNodeId> planNodeIds = new HashSet<>();
Map<PlanNodeId, Long> planNodeInputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeInputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeRawInputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeRawInputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeOutputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeOutputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeScheduledMillis = new HashMap<>();
Map<PlanNodeId, Long> planNodeCpuMillis = new HashMap<>();
Map<PlanNodeId, Map<String, OperatorInputStats>> operatorInputStats = new HashMap<>();
Map<PlanNodeId, Map<String, OperatorHashCollisionsStats>> operatorHashCollisionsStats = new HashMap<>();
Map<PlanNodeId, WindowOperatorStats> windowNodeStats = new HashMap<>();
for (PipelineStats pipelineStats : taskStats.getPipelines()) {
// Due to eventual consistently collected stats, these could be empty
if (pipelineStats.getOperatorSummaries().isEmpty()) {
continue;
}
Set<PlanNodeId> processedNodes = new HashSet<>();
PlanNodeId inputPlanNode = pipelineStats.getOperatorSummaries().iterator().next().getPlanNodeId();
PlanNodeId outputPlanNode = getLast(pipelineStats.getOperatorSummaries()).getPlanNodeId();
// Gather input statistics
for (OperatorStats operatorStats : pipelineStats.getOperatorSummaries()) {
PlanNodeId planNodeId = operatorStats.getPlanNodeId();
planNodeIds.add(planNodeId);
long scheduledMillis = operatorStats.getAddInputWall().toMillis() + operatorStats.getGetOutputWall().toMillis() + operatorStats.getFinishWall().toMillis();
planNodeScheduledMillis.merge(planNodeId, scheduledMillis, Long::sum);
long cpuMillis = operatorStats.getAddInputCpu().toMillis() + operatorStats.getGetOutputCpu().toMillis() + operatorStats.getFinishCpu().toMillis();
planNodeCpuMillis.merge(planNodeId, cpuMillis, Long::sum);
// A pipeline like hash build before join might link to another "internal" pipelines which provide actual input for this plan node
if (operatorStats.getPlanNodeId().equals(inputPlanNode) && !pipelineStats.isInputPipeline()) {
continue;
}
if (processedNodes.contains(planNodeId)) {
continue;
}
operatorInputStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorInputStats(operatorStats.getTotalDrivers(), operatorStats.getInputPositions(), operatorStats.getSumSquaredInputPositions())), (map1, map2) -> mergeMaps(map1, map2, OperatorInputStats::merge));
if (operatorStats.getInfo() instanceof HashCollisionsInfo) {
HashCollisionsInfo hashCollisionsInfo = (HashCollisionsInfo) operatorStats.getInfo();
operatorHashCollisionsStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorHashCollisionsStats(hashCollisionsInfo.getWeightedHashCollisions(), hashCollisionsInfo.getWeightedSumSquaredHashCollisions(), hashCollisionsInfo.getWeightedExpectedHashCollisions())), (map1, map2) -> mergeMaps(map1, map2, OperatorHashCollisionsStats::merge));
}
// The only statistics we have for Window Functions are very low level, thus displayed only in VERBOSE mode
if (operatorStats.getInfo() instanceof WindowInfo) {
WindowInfo windowInfo = (WindowInfo) operatorStats.getInfo();
windowNodeStats.merge(planNodeId, WindowOperatorStats.create(windowInfo), (left, right) -> left.mergeWith(right));
}
planNodeInputPositions.merge(planNodeId, operatorStats.getInputPositions(), Long::sum);
planNodeInputBytes.merge(planNodeId, operatorStats.getInputDataSize().toBytes(), Long::sum);
planNodeRawInputPositions.merge(planNodeId, operatorStats.getRawInputPositions(), Long::sum);
planNodeRawInputBytes.merge(planNodeId, operatorStats.getRawInputDataSize().toBytes(), Long::sum);
processedNodes.add(planNodeId);
}
// Gather output statistics
processedNodes.clear();
for (OperatorStats operatorStats : reverse(pipelineStats.getOperatorSummaries())) {
PlanNodeId planNodeId = operatorStats.getPlanNodeId();
// An "internal" pipeline like a hash build, links to another pipeline which is the actual output for this plan node
if (operatorStats.getPlanNodeId().equals(outputPlanNode) && !pipelineStats.isOutputPipeline()) {
continue;
}
if (processedNodes.contains(planNodeId)) {
continue;
}
planNodeOutputPositions.merge(planNodeId, operatorStats.getOutputPositions(), Long::sum);
planNodeOutputBytes.merge(planNodeId, operatorStats.getOutputDataSize().toBytes(), Long::sum);
processedNodes.add(planNodeId);
}
}
List<PlanNodeStats> stats = new ArrayList<>();
for (PlanNodeId planNodeId : planNodeIds) {
if (!planNodeInputPositions.containsKey(planNodeId)) {
continue;
}
PlanNodeStats nodeStats;
// It's possible there will be no output stats because all the pipelines that we observed were non-output.
// For example in a query like SELECT * FROM a JOIN b ON c = d LIMIT 1
// It's possible to observe stats after the build starts, but before the probe does
// and therefore only have scheduled time, but no output stats
long outputPositions = planNodeOutputPositions.getOrDefault(planNodeId, 0L);
if (operatorHashCollisionsStats.containsKey(planNodeId)) {
nodeStats = new HashCollisionPlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), planNodeRawInputPositions.get(planNodeId), succinctDataSize(planNodeRawInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), operatorHashCollisionsStats.get(planNodeId));
} else if (windowNodeStats.containsKey(planNodeId)) {
nodeStats = new WindowPlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), planNodeRawInputPositions.get(planNodeId), succinctDataSize(planNodeRawInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), windowNodeStats.get(planNodeId));
} else {
nodeStats = new PlanNodeStats(planNodeId, new Duration(planNodeScheduledMillis.get(planNodeId), MILLISECONDS), new Duration(planNodeCpuMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), planNodeRawInputPositions.get(planNodeId), succinctDataSize(planNodeRawInputBytes.get(planNodeId), BYTE), outputPositions, succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId));
}
stats.add(nodeStats);
}
return stats;
}
use of com.facebook.presto.operator.OperatorStats in project presto by prestodb.
the class PlanPrinter method getPlanNodeStats.
private static List<PlanNodeStats> getPlanNodeStats(TaskStats taskStats) {
// Best effort to reconstruct the plan nodes from operators.
// Because stats are collected separately from query execution,
// it's possible that some or all of them are missing or out of date.
// For example, a LIMIT clause can cause a query to finish before stats
// are collected from the leaf stages.
Map<PlanNodeId, Long> planNodeInputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeInputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeOutputPositions = new HashMap<>();
Map<PlanNodeId, Long> planNodeOutputBytes = new HashMap<>();
Map<PlanNodeId, Long> planNodeWallMillis = new HashMap<>();
Map<PlanNodeId, Map<String, OperatorInputStats>> operatorInputStats = new HashMap<>();
Map<PlanNodeId, Map<String, OperatorHashCollisionsStats>> operatorHashCollisionsStats = new HashMap<>();
for (PipelineStats pipelineStats : taskStats.getPipelines()) {
// Due to eventual consistently collected stats, these could be empty
if (pipelineStats.getOperatorSummaries().isEmpty()) {
continue;
}
Set<PlanNodeId> processedNodes = new HashSet<>();
PlanNodeId inputPlanNode = pipelineStats.getOperatorSummaries().iterator().next().getPlanNodeId();
PlanNodeId outputPlanNode = getLast(pipelineStats.getOperatorSummaries()).getPlanNodeId();
// Gather input statistics
for (OperatorStats operatorStats : pipelineStats.getOperatorSummaries()) {
PlanNodeId planNodeId = operatorStats.getPlanNodeId();
long wall = operatorStats.getAddInputWall().toMillis() + operatorStats.getGetOutputWall().toMillis() + operatorStats.getFinishWall().toMillis();
planNodeWallMillis.merge(planNodeId, wall, Long::sum);
// A pipeline like hash build before join might link to another "internal" pipelines which provide actual input for this plan node
if (operatorStats.getPlanNodeId().equals(inputPlanNode) && !pipelineStats.isInputPipeline()) {
continue;
}
if (processedNodes.contains(planNodeId)) {
continue;
}
operatorInputStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorInputStats(operatorStats.getTotalDrivers(), operatorStats.getInputPositions(), operatorStats.getSumSquaredInputPositions())), PlanPrinter::mergeOperatorInputStatsMaps);
if (operatorStats.getInfo() instanceof HashCollisionsInfo) {
HashCollisionsInfo hashCollisionsInfo = (HashCollisionsInfo) operatorStats.getInfo();
operatorHashCollisionsStats.merge(planNodeId, ImmutableMap.of(operatorStats.getOperatorType(), new OperatorHashCollisionsStats(hashCollisionsInfo.getWeightedHashCollisions(), hashCollisionsInfo.getWeightedSumSquaredHashCollisions(), hashCollisionsInfo.getWeightedExpectedHashCollisions())), PlanPrinter::mergeOperatorHashCollisionsStatsMaps);
}
planNodeInputPositions.merge(planNodeId, operatorStats.getInputPositions(), Long::sum);
planNodeInputBytes.merge(planNodeId, operatorStats.getInputDataSize().toBytes(), Long::sum);
processedNodes.add(planNodeId);
}
// Gather output statistics
processedNodes.clear();
for (OperatorStats operatorStats : reverse(pipelineStats.getOperatorSummaries())) {
PlanNodeId planNodeId = operatorStats.getPlanNodeId();
// An "internal" pipeline like a hash build, links to another pipeline which is the actual output for this plan node
if (operatorStats.getPlanNodeId().equals(outputPlanNode) && !pipelineStats.isOutputPipeline()) {
continue;
}
if (processedNodes.contains(planNodeId)) {
continue;
}
planNodeOutputPositions.merge(planNodeId, operatorStats.getOutputPositions(), Long::sum);
planNodeOutputBytes.merge(planNodeId, operatorStats.getOutputDataSize().toBytes(), Long::sum);
processedNodes.add(planNodeId);
}
}
List<PlanNodeStats> stats = new ArrayList<>();
for (Map.Entry<PlanNodeId, Long> entry : planNodeWallMillis.entrySet()) {
PlanNodeId planNodeId = entry.getKey();
stats.add(new PlanNodeStats(planNodeId, new Duration(planNodeWallMillis.get(planNodeId), MILLISECONDS), planNodeInputPositions.get(planNodeId), succinctDataSize(planNodeInputBytes.get(planNodeId), BYTE), // and therefore only have wall time, but no output stats
planNodeOutputPositions.getOrDefault(planNodeId, 0L), succinctDataSize(planNodeOutputBytes.getOrDefault(planNodeId, 0L), BYTE), operatorInputStats.get(planNodeId), // Only some operators emit hash collisions statistics
operatorHashCollisionsStats.getOrDefault(planNodeId, emptyMap())));
}
return stats;
}
Aggregations