use of com.facebook.presto.execution.StageExecutionInfo in project presto by prestodb.
the class PrestoSparkQueryExecutionFactory method createQueryInfo.
private static QueryInfo createQueryInfo(Session session, String query, QueryState queryState, Optional<PlanAndMore> planAndMore, Optional<String> sparkQueueName, Optional<ExecutionFailureInfo> failureInfo, QueryStateTimer queryStateTimer, Optional<StageInfo> rootStage, WarningCollector warningCollector) {
checkArgument(failureInfo.isPresent() || queryState != FAILED, "unexpected query state: %s", queryState);
int peakRunningTasks = 0;
long peakUserMemoryReservationInBytes = 0;
long peakTotalMemoryReservationInBytes = 0;
long peakTaskUserMemoryInBytes = 0;
long peakTaskTotalMemoryInBytes = 0;
long peakNodeTotalMemoryInBytes = 0;
for (StageInfo stageInfo : getAllStages(rootStage)) {
StageExecutionInfo stageExecutionInfo = stageInfo.getLatestAttemptExecutionInfo();
for (TaskInfo taskInfo : stageExecutionInfo.getTasks()) {
// there's no way to know how many tasks were running in parallel in Spark
// for now let's assume that all the tasks were running in parallel
peakRunningTasks++;
long taskPeakUserMemoryInBytes = taskInfo.getStats().getPeakUserMemoryInBytes();
long taskPeakTotalMemoryInBytes = taskInfo.getStats().getPeakTotalMemoryInBytes();
peakUserMemoryReservationInBytes += taskPeakUserMemoryInBytes;
peakTotalMemoryReservationInBytes += taskPeakTotalMemoryInBytes;
peakTaskUserMemoryInBytes = max(peakTaskUserMemoryInBytes, taskPeakUserMemoryInBytes);
peakTaskTotalMemoryInBytes = max(peakTaskTotalMemoryInBytes, taskPeakTotalMemoryInBytes);
peakNodeTotalMemoryInBytes = max(taskInfo.getStats().getPeakNodeTotalMemoryInBytes(), peakNodeTotalMemoryInBytes);
}
}
QueryStats queryStats = QueryStats.create(queryStateTimer, rootStage, peakRunningTasks, succinctBytes(peakUserMemoryReservationInBytes), succinctBytes(peakTotalMemoryReservationInBytes), succinctBytes(peakTaskUserMemoryInBytes), succinctBytes(peakTaskTotalMemoryInBytes), succinctBytes(peakNodeTotalMemoryInBytes), session.getRuntimeStats());
return new QueryInfo(session.getQueryId(), session.toSessionRepresentation(), queryState, new MemoryPoolId("spark-memory-pool"), queryStats.isScheduled(), URI.create("http://fake.invalid/query/" + session.getQueryId()), planAndMore.map(PlanAndMore::getFieldNames).orElse(ImmutableList.of()), query, Optional.empty(), Optional.empty(), queryStats, Optional.empty(), Optional.empty(), ImmutableMap.of(), ImmutableSet.of(), ImmutableMap.of(), ImmutableMap.of(), ImmutableSet.of(), Optional.empty(), false, planAndMore.flatMap(PlanAndMore::getUpdateType).orElse(null), rootStage, failureInfo.orElse(null), failureInfo.map(ExecutionFailureInfo::getErrorCode).orElse(null), warningCollector.getWarnings(), planAndMore.map(PlanAndMore::getInputs).orElse(ImmutableSet.of()), planAndMore.flatMap(PlanAndMore::getOutput), true, sparkQueueName.map(ResourceGroupId::new), planAndMore.flatMap(PlanAndMore::getQueryType), Optional.empty(), Optional.empty(), ImmutableMap.of(), ImmutableSet.of());
}
use of com.facebook.presto.execution.StageExecutionInfo in project presto by prestodb.
the class PrestoSparkQueryExecutionFactory method createStageInfo.
private static StageInfo createStageInfo(QueryId queryId, SubPlan plan, ListMultimap<PlanFragmentId, TaskInfo> taskInfoMap) {
PlanFragmentId planFragmentId = plan.getFragment().getId();
StageId stageId = new StageId(queryId, planFragmentId.getId());
List<TaskInfo> taskInfos = taskInfoMap.get(planFragmentId);
long peakUserMemoryReservationInBytes = 0;
long peakNodeTotalMemoryReservationInBytes = 0;
for (TaskInfo taskInfo : taskInfos) {
long taskPeakUserMemoryInBytes = taskInfo.getStats().getUserMemoryReservationInBytes();
peakUserMemoryReservationInBytes += taskPeakUserMemoryInBytes;
peakNodeTotalMemoryReservationInBytes = max(taskInfo.getStats().getPeakNodeTotalMemoryInBytes(), peakNodeTotalMemoryReservationInBytes);
}
StageExecutionInfo stageExecutionInfo = StageExecutionInfo.create(new StageExecutionId(stageId, 0), // TODO: figure out a way to know what exactly stage has caused a failure
StageExecutionState.FINISHED, Optional.empty(), taskInfos, DateTime.now(), new Distribution().snapshot(), succinctBytes(peakUserMemoryReservationInBytes), succinctBytes(peakNodeTotalMemoryReservationInBytes), 1, 1);
return new StageInfo(stageId, URI.create("http://fake.invalid/stage/" + stageId), Optional.of(plan.getFragment()), stageExecutionInfo, ImmutableList.of(), plan.getChildren().stream().map(child -> createStageInfo(queryId, child, taskInfoMap)).collect(toImmutableList()), false);
}
use of com.facebook.presto.execution.StageExecutionInfo in project presto by prestodb.
the class LegacySqlQueryScheduler method schedule.
private void schedule() {
if (!scheduling.compareAndSet(false, true)) {
// still scheduling the previous batch of stages
return;
}
List<StageExecutionAndScheduler> scheduledStageExecutions = new ArrayList<>();
try (SetThreadName ignored = new SetThreadName("Query-%s", queryStateMachine.getQueryId())) {
Set<StageId> completedStages = new HashSet<>();
List<ExecutionSchedule> sectionExecutionSchedules = new LinkedList<>();
while (!Thread.currentThread().isInterrupted()) {
// remove finished section
sectionExecutionSchedules.removeIf(ExecutionSchedule::isFinished);
// try to pull more section that are ready to be run
List<StreamingPlanSection> sectionsReadyForExecution = getSectionsReadyForExecution();
// all finished
if (sectionsReadyForExecution.isEmpty() && sectionExecutionSchedules.isEmpty()) {
break;
}
List<List<StageExecutionAndScheduler>> sectionStageExecutions = getStageExecutions(sectionsReadyForExecution);
sectionStageExecutions.forEach(scheduledStageExecutions::addAll);
sectionStageExecutions.stream().map(executionInfos -> executionInfos.stream().collect(toImmutableList())).map(executionPolicy::createExecutionSchedule).forEach(sectionExecutionSchedules::add);
while (sectionExecutionSchedules.stream().noneMatch(ExecutionSchedule::isFinished)) {
List<ListenableFuture<?>> blockedStages = new ArrayList<>();
List<StageExecutionAndScheduler> executionsToSchedule = sectionExecutionSchedules.stream().flatMap(schedule -> schedule.getStagesToSchedule().stream()).collect(toImmutableList());
for (StageExecutionAndScheduler stageExecutionAndScheduler : executionsToSchedule) {
SqlStageExecution stageExecution = stageExecutionAndScheduler.getStageExecution();
StageId stageId = stageExecution.getStageExecutionId().getStageId();
stageExecution.beginScheduling();
// perform some scheduling work
ScheduleResult result = stageExecutionAndScheduler.getStageScheduler().schedule();
// Track leaf tasks if partial results are enabled
if (isPartialResultsEnabled(session) && stageExecutionAndScheduler.getStageExecution().getFragment().isLeaf()) {
for (RemoteTask task : result.getNewTasks()) {
partialResultQueryTaskTracker.trackTask(task);
task.addFinalTaskInfoListener(partialResultQueryTaskTracker::recordTaskFinish);
}
}
// modify parent and children based on the results of the scheduling
if (result.isFinished()) {
stageExecution.schedulingComplete();
} else if (!result.getBlocked().isDone()) {
blockedStages.add(result.getBlocked());
}
stageExecutionAndScheduler.getStageLinkage().processScheduleResults(stageExecution.getState(), result.getNewTasks());
schedulerStats.getSplitsScheduledPerIteration().add(result.getSplitsScheduled());
if (result.getBlockedReason().isPresent()) {
switch(result.getBlockedReason().get()) {
case WRITER_SCALING:
// no-op
break;
case WAITING_FOR_SOURCE:
schedulerStats.getWaitingForSource().update(1);
break;
case SPLIT_QUEUES_FULL:
schedulerStats.getSplitQueuesFull().update(1);
break;
case MIXED_SPLIT_QUEUES_FULL_AND_WAITING_FOR_SOURCE:
schedulerStats.getMixedSplitQueuesFullAndWaitingForSource().update(1);
break;
case NO_ACTIVE_DRIVER_GROUP:
schedulerStats.getNoActiveDriverGroup().update(1);
break;
default:
throw new UnsupportedOperationException("Unknown blocked reason: " + result.getBlockedReason().get());
}
}
}
// make sure to update stage linkage at least once per loop to catch async state changes (e.g., partial cancel)
boolean stageFinishedExecution = false;
for (StageExecutionAndScheduler stageExecutionInfo : scheduledStageExecutions) {
SqlStageExecution stageExecution = stageExecutionInfo.getStageExecution();
StageId stageId = stageExecution.getStageExecutionId().getStageId();
if (!completedStages.contains(stageId) && stageExecution.getState().isDone()) {
stageExecutionInfo.getStageLinkage().processScheduleResults(stageExecution.getState(), ImmutableSet.of());
completedStages.add(stageId);
stageFinishedExecution = true;
}
}
// if any stage has just finished execution try to pull more sections for scheduling
if (stageFinishedExecution) {
break;
}
// wait for a state change and then schedule again
if (!blockedStages.isEmpty()) {
try (TimeStat.BlockTimer timer = schedulerStats.getSleepTime().time()) {
tryGetFutureValue(whenAnyComplete(blockedStages), 1, SECONDS);
}
for (ListenableFuture<?> blockedStage : blockedStages) {
blockedStage.cancel(true);
}
}
}
}
for (StageExecutionAndScheduler stageExecutionInfo : scheduledStageExecutions) {
StageExecutionState state = stageExecutionInfo.getStageExecution().getState();
if (state != SCHEDULED && state != RUNNING && !state.isDone()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Scheduling is complete, but stage execution %s is in state %s", stageExecutionInfo.getStageExecution().getStageExecutionId(), state));
}
}
scheduling.set(false);
// Inform the tracker that task scheduling has completed
partialResultQueryTaskTracker.completeTaskScheduling();
if (!getSectionsReadyForExecution().isEmpty()) {
startScheduling();
}
} catch (Throwable t) {
scheduling.set(false);
queryStateMachine.transitionToFailed(t);
throw t;
} finally {
RuntimeException closeError = new RuntimeException();
for (StageExecutionAndScheduler stageExecutionInfo : scheduledStageExecutions) {
try {
stageExecutionInfo.getStageScheduler().close();
} catch (Throwable t) {
queryStateMachine.transitionToFailed(t);
// Self-suppression not permitted
if (closeError != t) {
closeError.addSuppressed(t);
}
}
}
if (closeError.getSuppressed().length > 0) {
throw closeError;
}
}
}
use of com.facebook.presto.execution.StageExecutionInfo in project presto by prestodb.
the class SqlQueryScheduler method buildStageInfo.
private StageInfo buildStageInfo(SubPlan subPlan, ListMultimap<StageId, SqlStageExecution> stageExecutions) {
StageId stageId = getStageId(subPlan.getFragment().getId());
List<SqlStageExecution> attempts = stageExecutions.get(stageId);
StageExecutionInfo latestAttemptInfo = attempts.isEmpty() ? unscheduledExecutionInfo(stageId.getId(), queryStateMachine.isDone()) : getLast(attempts).getStageExecutionInfo();
List<StageExecutionInfo> previousAttemptInfos = attempts.size() < 2 ? ImmutableList.of() : attempts.subList(0, attempts.size() - 1).stream().map(SqlStageExecution::getStageExecutionInfo).collect(toImmutableList());
return new StageInfo(stageId, locationFactory.createStageLocation(stageId), Optional.of(subPlan.getFragment()), latestAttemptInfo, previousAttemptInfos, subPlan.getChildren().stream().map(plan -> buildStageInfo(plan, stageExecutions)).collect(toImmutableList()), runtimeOptimizedStages.contains(stageId));
}
use of com.facebook.presto.execution.StageExecutionInfo in project presto by prestodb.
the class QueryMonitor method computeStageStatistics.
private static void computeStageStatistics(StageInfo stageInfo, ImmutableList.Builder<StageStatistics> stageStatisticsBuilder) {
Distribution cpuDistribution = new Distribution();
Distribution memoryDistribution = new Distribution();
StageExecutionInfo executionInfo = stageInfo.getLatestAttemptExecutionInfo();
for (TaskInfo taskInfo : executionInfo.getTasks()) {
cpuDistribution.add(NANOSECONDS.toMillis(taskInfo.getStats().getTotalCpuTimeInNanos()));
memoryDistribution.add(taskInfo.getStats().getPeakTotalMemoryInBytes());
}
stageStatisticsBuilder.add(new StageStatistics(stageInfo.getStageId().getId(), executionInfo.getStats().getGcInfo().getStageExecutionId(), executionInfo.getTasks().size(), executionInfo.getStats().getTotalScheduledTime(), executionInfo.getStats().getTotalCpuTime(), executionInfo.getStats().getRetriedCpuTime(), executionInfo.getStats().getTotalBlockedTime(), executionInfo.getStats().getRawInputDataSize(), executionInfo.getStats().getProcessedInputDataSize(), executionInfo.getStats().getPhysicalWrittenDataSize(), executionInfo.getStats().getGcInfo(), createResourceDistribution(cpuDistribution.snapshot()), createResourceDistribution(memoryDistribution.snapshot())));
stageInfo.getSubStages().forEach(subStage -> computeStageStatistics(subStage, stageStatisticsBuilder));
}
Aggregations