Search in sources :

Example 1 with TaskStats

use of io.trino.operator.TaskStats in project trino by trinodb.

the class TaskSystemTable method cursor.

@Override
public RecordCursor cursor(ConnectorTransactionHandle transactionHandle, ConnectorSession session, TupleDomain<Integer> constraint) {
    Builder table = InMemoryRecordSet.builder(TASK_TABLE);
    for (TaskInfo taskInfo : taskManager.getAllTaskInfo()) {
        TaskStats stats = taskInfo.getStats();
        TaskStatus taskStatus = taskInfo.getTaskStatus();
        table.addRow(nodeId, taskStatus.getTaskId().toString(), taskStatus.getTaskId().getStageId().toString(), taskStatus.getTaskId().getQueryId().toString(), taskStatus.getState().toString(), (long) stats.getTotalDrivers(), (long) stats.getQueuedDrivers(), (long) stats.getRunningDrivers(), (long) stats.getCompletedDrivers(), toMillis(stats.getTotalScheduledTime()), toMillis(stats.getTotalCpuTime()), toMillis(stats.getTotalBlockedTime()), toBytes(stats.getRawInputDataSize()), stats.getRawInputPositions(), toBytes(stats.getProcessedInputDataSize()), stats.getProcessedInputPositions(), toBytes(stats.getOutputDataSize()), stats.getOutputPositions(), toBytes(stats.getPhysicalInputDataSize()), toBytes(stats.getPhysicalWrittenDataSize()), toTimestampWithTimeZoneMillis(stats.getCreateTime()), toTimestampWithTimeZoneMillis(stats.getFirstStartTime()), toTimestampWithTimeZoneMillis(taskInfo.getLastHeartbeat()), toTimestampWithTimeZoneMillis(stats.getEndTime()));
    }
    return table.build().cursor();
}
Also used : TaskInfo(io.trino.execution.TaskInfo) TableMetadataBuilder.tableMetadataBuilder(io.trino.metadata.MetadataUtil.TableMetadataBuilder.tableMetadataBuilder) Builder(io.trino.spi.connector.InMemoryRecordSet.Builder) TaskStats(io.trino.operator.TaskStats) TaskStatus(io.trino.execution.TaskStatus)

Example 2 with TaskStats

use of io.trino.operator.TaskStats in project trino by trinodb.

the class SqlTask method createTaskInfo.

private TaskInfo createTaskInfo(TaskHolder taskHolder) {
    // create task status first to prevent potentially seeing incomplete stats for a done task state
    TaskStatus taskStatus = createTaskStatus(taskHolder);
    TaskStats taskStats = getTaskStats(taskHolder);
    Set<PlanNodeId> noMoreSplits = getNoMoreSplits(taskHolder);
    return new TaskInfo(taskStatus, lastHeartbeat.get(), outputBuffer.getInfo(), noMoreSplits, taskStats, needsPlan.get());
}
Also used : PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) TaskStats(io.trino.operator.TaskStats)

Example 3 with TaskStats

use of io.trino.operator.TaskStats in project trino by trinodb.

the class SqlTask method getTaskStats.

private TaskStats getTaskStats(TaskHolder taskHolder) {
    TaskInfo finalTaskInfo = taskHolder.getFinalTaskInfo();
    if (finalTaskInfo != null) {
        return finalTaskInfo.getStats();
    }
    SqlTaskExecution taskExecution = taskHolder.getTaskExecution();
    if (taskExecution != null) {
        return taskExecution.getTaskContext().getTaskStats();
    }
    // if the task completed without creation, set end time
    DateTime endTime = taskStateMachine.getState().isDone() ? DateTime.now() : null;
    return new TaskStats(taskStateMachine.getCreatedTime(), endTime);
}
Also used : TaskStats(io.trino.operator.TaskStats) DateTime(org.joda.time.DateTime)

Example 4 with TaskStats

use of io.trino.operator.TaskStats in project trino by trinodb.

the class SqlTask method createTaskStatus.

private TaskStatus createTaskStatus(TaskHolder taskHolder) {
    // Obtain task status version before building actual TaskStatus object.
    // This way any task updates won't be lost since all updates happen
    // before version number is increased.
    long versionNumber = taskStatusVersion.get();
    TaskState state = taskStateMachine.getState();
    List<ExecutionFailureInfo> failures = ImmutableList.of();
    if (state == FAILED) {
        failures = toFailures(taskStateMachine.getFailureCauses());
    }
    int queuedPartitionedDrivers = 0;
    long queuedPartitionedSplitsWeight = 0L;
    int runningPartitionedDrivers = 0;
    long runningPartitionedSplitsWeight = 0L;
    DataSize physicalWrittenDataSize = DataSize.ofBytes(0);
    DataSize userMemoryReservation = DataSize.ofBytes(0);
    DataSize revocableMemoryReservation = DataSize.ofBytes(0);
    // TODO: add a mechanism to avoid sending the whole completedDriverGroups set over the wire for every task status reply
    Set<Lifespan> completedDriverGroups = ImmutableSet.of();
    long fullGcCount = 0;
    Duration fullGcTime = new Duration(0, MILLISECONDS);
    long dynamicFiltersVersion = INITIAL_DYNAMIC_FILTERS_VERSION;
    if (taskHolder.getFinalTaskInfo() != null) {
        TaskInfo taskInfo = taskHolder.getFinalTaskInfo();
        TaskStats taskStats = taskInfo.getStats();
        queuedPartitionedDrivers = taskStats.getQueuedPartitionedDrivers();
        queuedPartitionedSplitsWeight = taskStats.getQueuedPartitionedSplitsWeight();
        runningPartitionedDrivers = taskStats.getRunningPartitionedDrivers();
        runningPartitionedSplitsWeight = taskStats.getRunningPartitionedSplitsWeight();
        physicalWrittenDataSize = taskStats.getPhysicalWrittenDataSize();
        userMemoryReservation = taskStats.getUserMemoryReservation();
        revocableMemoryReservation = taskStats.getRevocableMemoryReservation();
        fullGcCount = taskStats.getFullGcCount();
        fullGcTime = taskStats.getFullGcTime();
    } else if (taskHolder.getTaskExecution() != null) {
        long physicalWrittenBytes = 0;
        TaskContext taskContext = taskHolder.getTaskExecution().getTaskContext();
        for (PipelineContext pipelineContext : taskContext.getPipelineContexts()) {
            PipelineStatus pipelineStatus = pipelineContext.getPipelineStatus();
            queuedPartitionedDrivers += pipelineStatus.getQueuedPartitionedDrivers();
            queuedPartitionedSplitsWeight += pipelineStatus.getQueuedPartitionedSplitsWeight();
            runningPartitionedDrivers += pipelineStatus.getRunningPartitionedDrivers();
            runningPartitionedSplitsWeight += pipelineStatus.getRunningPartitionedSplitsWeight();
            physicalWrittenBytes += pipelineContext.getPhysicalWrittenDataSize();
        }
        physicalWrittenDataSize = succinctBytes(physicalWrittenBytes);
        userMemoryReservation = taskContext.getMemoryReservation();
        revocableMemoryReservation = taskContext.getRevocableMemoryReservation();
        completedDriverGroups = taskContext.getCompletedDriverGroups();
        fullGcCount = taskContext.getFullGcCount();
        fullGcTime = taskContext.getFullGcTime();
        dynamicFiltersVersion = taskContext.getDynamicFiltersVersion();
    }
    return new TaskStatus(taskStateMachine.getTaskId(), taskInstanceId, versionNumber, state, location, nodeId, completedDriverGroups, failures, queuedPartitionedDrivers, runningPartitionedDrivers, isOutputBufferOverutilized(), physicalWrittenDataSize, userMemoryReservation, revocableMemoryReservation, fullGcCount, fullGcTime, dynamicFiltersVersion, queuedPartitionedSplitsWeight, runningPartitionedSplitsWeight);
}
Also used : PipelineStatus(io.trino.operator.PipelineStatus) TaskContext(io.trino.operator.TaskContext) Duration(io.airlift.units.Duration) TaskStats(io.trino.operator.TaskStats) PipelineContext(io.trino.operator.PipelineContext) DataSize(io.airlift.units.DataSize)

Example 5 with TaskStats

use of io.trino.operator.TaskStats in project trino by trinodb.

the class StageStateMachine method getStageInfo.

public StageInfo getStageInfo(Supplier<Iterable<TaskInfo>> taskInfosSupplier) {
    Optional<StageInfo> finalStageInfo = this.finalStageInfo.get();
    if (finalStageInfo.isPresent()) {
        return finalStageInfo.get();
    }
    // stage state must be captured first in order to provide a
    // consistent view of the stage. For example, building this
    // information, the stage could finish, and the task states would
    // never be visible.
    StageState state = stageState.get();
    List<TaskInfo> taskInfos = ImmutableList.copyOf(taskInfosSupplier.get());
    int totalTasks = taskInfos.size();
    int runningTasks = 0;
    int completedTasks = 0;
    int failedTasks = 0;
    int totalDrivers = 0;
    int queuedDrivers = 0;
    int runningDrivers = 0;
    int blockedDrivers = 0;
    int completedDrivers = 0;
    long cumulativeUserMemory = 0;
    long failedCumulativeUserMemory = 0;
    long userMemoryReservation = 0;
    long revocableMemoryReservation = 0;
    long totalMemoryReservation = 0;
    long peakUserMemoryReservation = peakUserMemory.get();
    long peakRevocableMemoryReservation = peakRevocableMemory.get();
    long totalScheduledTime = 0;
    long failedScheduledTime = 0;
    long totalCpuTime = 0;
    long failedCpuTime = 0;
    long totalBlockedTime = 0;
    long physicalInputDataSize = 0;
    long failedPhysicalInputDataSize = 0;
    long physicalInputPositions = 0;
    long failedPhysicalInputPositions = 0;
    long physicalInputReadTime = 0;
    long failedPhysicalInputReadTime = 0;
    long internalNetworkInputDataSize = 0;
    long failedInternalNetworkInputDataSize = 0;
    long internalNetworkInputPositions = 0;
    long failedInternalNetworkInputPositions = 0;
    long rawInputDataSize = 0;
    long failedRawInputDataSize = 0;
    long rawInputPositions = 0;
    long failedRawInputPositions = 0;
    long processedInputDataSize = 0;
    long failedProcessedInputDataSize = 0;
    long processedInputPositions = 0;
    long failedProcessedInputPositions = 0;
    long bufferedDataSize = 0;
    long outputDataSize = 0;
    long failedOutputDataSize = 0;
    long outputPositions = 0;
    long failedOutputPositions = 0;
    long physicalWrittenDataSize = 0;
    long failedPhysicalWrittenDataSize = 0;
    int fullGcCount = 0;
    int fullGcTaskCount = 0;
    int minFullGcSec = 0;
    int maxFullGcSec = 0;
    int totalFullGcSec = 0;
    boolean fullyBlocked = true;
    Set<BlockedReason> blockedReasons = new HashSet<>();
    Map<String, OperatorStats> operatorToStats = new HashMap<>();
    for (TaskInfo taskInfo : taskInfos) {
        TaskState taskState = taskInfo.getTaskStatus().getState();
        if (taskState.isDone()) {
            completedTasks++;
        } else {
            runningTasks++;
        }
        if (taskState == TaskState.FAILED) {
            failedTasks++;
        }
        TaskStats taskStats = taskInfo.getStats();
        totalDrivers += taskStats.getTotalDrivers();
        queuedDrivers += taskStats.getQueuedDrivers();
        runningDrivers += taskStats.getRunningDrivers();
        blockedDrivers += taskStats.getBlockedDrivers();
        completedDrivers += taskStats.getCompletedDrivers();
        cumulativeUserMemory += taskStats.getCumulativeUserMemory();
        if (taskState == TaskState.FAILED) {
            failedCumulativeUserMemory += taskStats.getCumulativeUserMemory();
        }
        long taskUserMemory = taskStats.getUserMemoryReservation().toBytes();
        long taskRevocableMemory = taskStats.getRevocableMemoryReservation().toBytes();
        userMemoryReservation += taskUserMemory;
        revocableMemoryReservation += taskRevocableMemory;
        totalMemoryReservation += taskUserMemory + taskRevocableMemory;
        totalScheduledTime += taskStats.getTotalScheduledTime().roundTo(NANOSECONDS);
        totalCpuTime += taskStats.getTotalCpuTime().roundTo(NANOSECONDS);
        totalBlockedTime += taskStats.getTotalBlockedTime().roundTo(NANOSECONDS);
        if (taskState == TaskState.FAILED) {
            failedScheduledTime += taskStats.getTotalScheduledTime().roundTo(NANOSECONDS);
            failedCpuTime += taskStats.getTotalCpuTime().roundTo(NANOSECONDS);
        }
        if (!taskState.isDone()) {
            fullyBlocked &= taskStats.isFullyBlocked();
            blockedReasons.addAll(taskStats.getBlockedReasons());
        }
        physicalInputDataSize += taskStats.getPhysicalInputDataSize().toBytes();
        physicalInputPositions += taskStats.getPhysicalInputPositions();
        physicalInputReadTime += taskStats.getPhysicalInputReadTime().roundTo(NANOSECONDS);
        internalNetworkInputDataSize += taskStats.getInternalNetworkInputDataSize().toBytes();
        internalNetworkInputPositions += taskStats.getInternalNetworkInputPositions();
        rawInputDataSize += taskStats.getRawInputDataSize().toBytes();
        rawInputPositions += taskStats.getRawInputPositions();
        processedInputDataSize += taskStats.getProcessedInputDataSize().toBytes();
        processedInputPositions += taskStats.getProcessedInputPositions();
        bufferedDataSize += taskInfo.getOutputBuffers().getTotalBufferedBytes();
        outputDataSize += taskStats.getOutputDataSize().toBytes();
        outputPositions += taskStats.getOutputPositions();
        physicalWrittenDataSize += taskStats.getPhysicalWrittenDataSize().toBytes();
        if (taskState == TaskState.FAILED) {
            failedPhysicalInputDataSize += taskStats.getPhysicalInputDataSize().toBytes();
            failedPhysicalInputPositions += taskStats.getPhysicalInputPositions();
            failedPhysicalInputReadTime += taskStats.getPhysicalInputReadTime().roundTo(NANOSECONDS);
            failedInternalNetworkInputDataSize += taskStats.getInternalNetworkInputDataSize().toBytes();
            failedInternalNetworkInputPositions += taskStats.getInternalNetworkInputPositions();
            failedRawInputDataSize += taskStats.getRawInputDataSize().toBytes();
            failedRawInputPositions += taskStats.getRawInputPositions();
            failedProcessedInputDataSize += taskStats.getProcessedInputDataSize().toBytes();
            failedProcessedInputPositions += taskStats.getProcessedInputPositions();
            failedOutputDataSize += taskStats.getOutputDataSize().toBytes();
            failedOutputPositions += taskStats.getOutputPositions();
            failedPhysicalWrittenDataSize += taskStats.getPhysicalWrittenDataSize().toBytes();
        }
        fullGcCount += taskStats.getFullGcCount();
        fullGcTaskCount += taskStats.getFullGcCount() > 0 ? 1 : 0;
        int gcSec = toIntExact(taskStats.getFullGcTime().roundTo(SECONDS));
        totalFullGcSec += gcSec;
        minFullGcSec = min(minFullGcSec, gcSec);
        maxFullGcSec = max(maxFullGcSec, gcSec);
        for (PipelineStats pipeline : taskStats.getPipelines()) {
            for (OperatorStats operatorStats : pipeline.getOperatorSummaries()) {
                String id = pipeline.getPipelineId() + "." + operatorStats.getOperatorId();
                operatorToStats.compute(id, (k, v) -> v == null ? operatorStats : v.add(operatorStats));
            }
        }
    }
    StageStats stageStats = new StageStats(schedulingComplete.get(), getSplitDistribution.snapshot(), totalTasks, runningTasks, completedTasks, failedTasks, totalDrivers, queuedDrivers, runningDrivers, blockedDrivers, completedDrivers, cumulativeUserMemory, failedCumulativeUserMemory, succinctBytes(userMemoryReservation), succinctBytes(revocableMemoryReservation), succinctBytes(totalMemoryReservation), succinctBytes(peakUserMemoryReservation), succinctBytes(peakRevocableMemoryReservation), succinctDuration(totalScheduledTime, NANOSECONDS), succinctDuration(failedScheduledTime, NANOSECONDS), succinctDuration(totalCpuTime, NANOSECONDS), succinctDuration(failedCpuTime, NANOSECONDS), succinctDuration(totalBlockedTime, NANOSECONDS), fullyBlocked && runningTasks > 0, blockedReasons, succinctBytes(physicalInputDataSize), succinctBytes(failedPhysicalInputDataSize), physicalInputPositions, failedPhysicalInputPositions, succinctDuration(physicalInputReadTime, NANOSECONDS), succinctDuration(failedPhysicalInputReadTime, NANOSECONDS), succinctBytes(internalNetworkInputDataSize), succinctBytes(failedInternalNetworkInputDataSize), internalNetworkInputPositions, failedInternalNetworkInputPositions, succinctBytes(rawInputDataSize), succinctBytes(failedRawInputDataSize), rawInputPositions, failedRawInputPositions, succinctBytes(processedInputDataSize), succinctBytes(failedProcessedInputDataSize), processedInputPositions, failedProcessedInputPositions, succinctBytes(bufferedDataSize), succinctBytes(outputDataSize), succinctBytes(failedOutputDataSize), outputPositions, failedOutputPositions, succinctBytes(physicalWrittenDataSize), succinctBytes(failedPhysicalWrittenDataSize), new StageGcStatistics(stageId.getId(), totalTasks, fullGcTaskCount, minFullGcSec, maxFullGcSec, totalFullGcSec, (int) (1.0 * totalFullGcSec / fullGcCount)), ImmutableList.copyOf(operatorToStats.values()));
    ExecutionFailureInfo failureInfo = null;
    if (state == FAILED) {
        failureInfo = failureCause.get();
    }
    return new StageInfo(stageId, state, fragment, fragment.getPartitioning().isCoordinatorOnly(), fragment.getTypes(), stageStats, taskInfos, ImmutableList.of(), tables, failureInfo);
}
Also used : PipelineStats(io.trino.operator.PipelineStats) BlockedReason(io.trino.operator.BlockedReason) HashMap(java.util.HashMap) OperatorStats(io.trino.operator.OperatorStats) TaskStats(io.trino.operator.TaskStats) StageGcStatistics(io.trino.spi.eventlistener.StageGcStatistics) HashSet(java.util.HashSet)

Aggregations

TaskStats (io.trino.operator.TaskStats)8 Duration (io.airlift.units.Duration)2 TaskInfo (io.trino.execution.TaskInfo)2 BlockedReason (io.trino.operator.BlockedReason)2 TaskContext (io.trino.operator.TaskContext)2 HashSet (java.util.HashSet)2 DateTime (org.joda.time.DateTime)2 CpuTimer (io.airlift.stats.CpuTimer)1 CpuDuration (io.airlift.stats.CpuTimer.CpuDuration)1 TestingGcMonitor (io.airlift.stats.TestingGcMonitor)1 DataSize (io.airlift.units.DataSize)1 Duration.succinctDuration (io.airlift.units.Duration.succinctDuration)1 Session (io.trino.Session)1 QueryStats (io.trino.execution.QueryStats)1 StageId (io.trino.execution.StageId)1 StageInfo (io.trino.execution.StageInfo)1 TaskId (io.trino.execution.TaskId)1 TaskStateMachine (io.trino.execution.TaskStateMachine)1 TaskStatus (io.trino.execution.TaskStatus)1 MemoryPool (io.trino.memory.MemoryPool)1