Search in sources :

Example 1 with ExecutionFailureInfo

use of io.trino.execution.ExecutionFailureInfo in project trino by trinodb.

the class FaultTolerantStageScheduler method updateTaskStatus.

private void updateTaskStatus(TaskStatus taskStatus, Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle) {
    TaskState state = taskStatus.getState();
    if (!state.isDone()) {
        return;
    }
    try {
        RuntimeException failure = null;
        SettableFuture<Void> future;
        synchronized (this) {
            TaskId taskId = taskStatus.getTaskId();
            runningTasks.remove(taskId);
            future = taskFinishedFuture;
            if (!runningTasks.isEmpty()) {
                taskFinishedFuture = SettableFuture.create();
            } else {
                taskFinishedFuture = null;
            }
            NodeAllocator.NodeLease nodeLease = requireNonNull(runningNodes.remove(taskId), () -> "node not found for task id: " + taskId);
            nodeLease.release();
            int partitionId = taskId.getPartitionId();
            if (!finishedPartitions.contains(partitionId) && !closed) {
                switch(state) {
                    case FINISHED:
                        finishedPartitions.add(partitionId);
                        if (sinkExchange.isPresent()) {
                            checkArgument(exchangeSinkInstanceHandle.isPresent(), "exchangeSinkInstanceHandle is expected to be present");
                            sinkExchange.get().sinkFinished(exchangeSinkInstanceHandle.get());
                        }
                        partitionToRemoteTaskMap.get(partitionId).forEach(RemoteTask::abort);
                        break;
                    case CANCELED:
                        log.debug("Task cancelled: %s", taskId);
                        break;
                    case ABORTED:
                        log.debug("Task aborted: %s", taskId);
                        break;
                    case FAILED:
                        ExecutionFailureInfo failureInfo = taskStatus.getFailures().stream().findFirst().map(this::rewriteTransportFailure).orElse(toFailure(new TrinoException(GENERIC_INTERNAL_ERROR, "A task failed for an unknown reason")));
                        log.warn(failureInfo.toException(), "Task failed: %s", taskId);
                        ErrorCode errorCode = failureInfo.getErrorCode();
                        int taskRemainingAttempts = remainingAttemptsPerTask.getOrDefault(partitionId, maxRetryAttemptsPerTask);
                        if (remainingRetryAttemptsOverall > 0 && taskRemainingAttempts > 0 && (errorCode == null || errorCode.getType() != USER_ERROR)) {
                            remainingRetryAttemptsOverall--;
                            remainingAttemptsPerTask.put(partitionId, taskRemainingAttempts - 1);
                            // update memory limits for next attempt
                            MemoryRequirements memoryLimits = partitionMemoryRequirements.get(partitionId);
                            verify(memoryLimits != null);
                            MemoryRequirements newMemoryLimits = partitionMemoryEstimator.getNextRetryMemoryRequirements(session, memoryLimits, errorCode);
                            partitionMemoryRequirements.put(partitionId, newMemoryLimits);
                            // reschedule
                            queuedPartitions.add(partitionId);
                            log.debug("Retrying partition %s for stage %s", partitionId, stage.getStageId());
                        } else {
                            failure = failureInfo.toException();
                        }
                        break;
                    default:
                        throw new IllegalArgumentException("Unexpected task state: " + state);
                }
            }
        }
        if (failure != null) {
            // must be called outside the lock
            fail(failure);
        }
        if (future != null && !future.isDone()) {
            future.set(null);
        }
    } catch (Throwable t) {
        fail(t);
    }
}
Also used : TaskId(io.trino.execution.TaskId) MemoryRequirements(io.trino.execution.scheduler.PartitionMemoryEstimator.MemoryRequirements) RemoteTask(io.trino.execution.RemoteTask) ExecutionFailureInfo(io.trino.execution.ExecutionFailureInfo) TrinoException(io.trino.spi.TrinoException) ErrorCode(io.trino.spi.ErrorCode) MoreFutures.asVoid(io.airlift.concurrent.MoreFutures.asVoid) TaskState(io.trino.execution.TaskState)

Example 2 with ExecutionFailureInfo

use of io.trino.execution.ExecutionFailureInfo in project trino by trinodb.

the class FailedDispatchQuery method immediateFailureQueryInfo.

private static QueryInfo immediateFailureQueryInfo(Session session, String query, Optional<String> preparedQuery, URI self, Optional<ResourceGroupId> resourceGroupId, Throwable throwable) {
    ExecutionFailureInfo failureCause = toFailure(throwable);
    QueryInfo queryInfo = new QueryInfo(session.getQueryId(), session.toSessionRepresentation(), QueryState.FAILED, false, self, ImmutableList.of(), query, preparedQuery, immediateFailureQueryStats(), Optional.empty(), Optional.empty(), Optional.empty(), ImmutableMap.of(), ImmutableSet.of(), ImmutableMap.of(), ImmutableMap.of(), ImmutableSet.of(), Optional.empty(), false, null, Optional.empty(), failureCause, failureCause.getErrorCode(), ImmutableList.of(), ImmutableSet.of(), Optional.empty(), ImmutableList.of(), ImmutableList.of(), true, resourceGroupId, Optional.empty());
    return queryInfo;
}
Also used : BasicQueryInfo(io.trino.server.BasicQueryInfo) QueryInfo(io.trino.execution.QueryInfo) ExecutionFailureInfo(io.trino.execution.ExecutionFailureInfo)

Example 3 with ExecutionFailureInfo

use of io.trino.execution.ExecutionFailureInfo in project trino by trinodb.

the class LocalDispatchQuery method getDispatchInfo.

@Override
public DispatchInfo getDispatchInfo() {
    // observe submitted before getting the state, to ensure a failed query stat is visible
    boolean dispatched = submitted.isDone();
    BasicQueryInfo queryInfo = stateMachine.getBasicQueryInfo(Optional.empty());
    if (queryInfo.getState() == QueryState.FAILED) {
        ExecutionFailureInfo failureInfo = stateMachine.getFailureInfo().orElseGet(() -> toFailure(new TrinoException(GENERIC_INTERNAL_ERROR, "Query failed for an unknown reason")));
        return DispatchInfo.failed(failureInfo, queryInfo.getQueryStats().getElapsedTime(), queryInfo.getQueryStats().getQueuedTime());
    }
    if (dispatched) {
        return DispatchInfo.dispatched(new LocalCoordinatorLocation(), queryInfo.getQueryStats().getElapsedTime(), queryInfo.getQueryStats().getQueuedTime());
    }
    return DispatchInfo.queued(queryInfo.getQueryStats().getElapsedTime(), queryInfo.getQueryStats().getQueuedTime());
}
Also used : BasicQueryInfo(io.trino.server.BasicQueryInfo) TrinoException(io.trino.spi.TrinoException) ExecutionFailureInfo(io.trino.execution.ExecutionFailureInfo)

Example 4 with ExecutionFailureInfo

use of io.trino.execution.ExecutionFailureInfo in project trino by trinodb.

the class TestFailures method testToFailureLoop.

@Test
public void testToFailureLoop() {
    Throwable exception1 = new TrinoException(TOO_MANY_REQUESTS_FAILED, "fake exception 1");
    Throwable exception2 = new RuntimeException("fake exception 2", exception1);
    exception1.addSuppressed(exception2);
    // add exception 1 --> add suppress (exception 2) --> add cause (exception 1)
    ExecutionFailureInfo failure = toFailure(exception1);
    assertEquals(failure.getMessage(), "fake exception 1");
    assertNull(failure.getCause());
    assertEquals(failure.getSuppressed().size(), 1);
    assertEquals(failure.getSuppressed().get(0).getMessage(), "fake exception 2");
    assertEquals(failure.getErrorCode(), TOO_MANY_REQUESTS_FAILED.toErrorCode());
    // add exception 2 --> add cause (exception 2) --> add suppress (exception 1)
    failure = toFailure(exception2);
    assertEquals(failure.getMessage(), "fake exception 2");
    assertNotNull(failure.getCause());
    assertEquals(failure.getCause().getMessage(), "fake exception 1");
    assertEquals(failure.getSuppressed().size(), 0);
    assertEquals(failure.getErrorCode(), TOO_MANY_REQUESTS_FAILED.toErrorCode());
    // add exception 1 --> add suppress (exception 2) --> add suppress (exception 1)
    exception1 = new TrinoException(TOO_MANY_REQUESTS_FAILED, "fake exception 1");
    exception2 = new RuntimeException("fake exception 2");
    exception1.addSuppressed(exception2);
    exception2.addSuppressed(exception1);
    failure = toFailure(exception1);
    assertEquals(failure.getMessage(), "fake exception 1");
    assertNull(failure.getCause());
    assertEquals(failure.getSuppressed().size(), 1);
    assertEquals(failure.getSuppressed().get(0).getMessage(), "fake exception 2");
    assertEquals(failure.getErrorCode(), TOO_MANY_REQUESTS_FAILED.toErrorCode());
    // add exception 2 --> add cause (exception 1) --> add cause (exception 2)
    exception1 = new RuntimeException("fake exception 1");
    exception2 = new RuntimeException("fake exception 2", exception1);
    exception1.initCause(exception2);
    failure = toFailure(exception2);
    assertEquals(failure.getMessage(), "fake exception 2");
    assertNotNull(failure.getCause());
    assertEquals(failure.getCause().getMessage(), "fake exception 1");
    assertEquals(failure.getSuppressed().size(), 0);
    assertEquals(failure.getErrorCode(), GENERIC_INTERNAL_ERROR.toErrorCode());
}
Also used : TrinoException(io.trino.spi.TrinoException) ExecutionFailureInfo(io.trino.execution.ExecutionFailureInfo) Test(org.testng.annotations.Test)

Example 5 with ExecutionFailureInfo

use of io.trino.execution.ExecutionFailureInfo in project trino by trinodb.

the class Query method toQueryError.

private static QueryError toQueryError(QueryInfo queryInfo, Optional<Throwable> exception) {
    QueryState state = queryInfo.getState();
    if (state != FAILED && exception.isEmpty()) {
        return null;
    }
    ExecutionFailureInfo executionFailure;
    if (queryInfo.getFailureInfo() != null) {
        executionFailure = queryInfo.getFailureInfo();
    } else if (exception.isPresent()) {
        executionFailure = toFailure(exception.get());
    } else {
        log.warn("Query %s in state %s has no failure info", queryInfo.getQueryId(), state);
        executionFailure = toFailure(new RuntimeException(format("Query is %s (reason unknown)", state)));
    }
    FailureInfo failure = executionFailure.toFailureInfo();
    ErrorCode errorCode;
    if (queryInfo.getErrorCode() != null) {
        errorCode = queryInfo.getErrorCode();
    } else if (exception.isPresent()) {
        errorCode = SERIALIZATION_ERROR.toErrorCode();
    } else {
        errorCode = GENERIC_INTERNAL_ERROR.toErrorCode();
        log.warn("Failed query %s has no error code", queryInfo.getQueryId());
    }
    return new QueryError(firstNonNull(failure.getMessage(), "Internal error"), null, errorCode.getCode(), errorCode.getName(), errorCode.getType().toString(), failure.getErrorLocation(), failure);
}
Also used : ExecutionFailureInfo(io.trino.execution.ExecutionFailureInfo) FailureInfo(io.trino.client.FailureInfo) QueryState(io.trino.execution.QueryState) ErrorCode(io.trino.spi.ErrorCode) QueryError(io.trino.client.QueryError) ExecutionFailureInfo(io.trino.execution.ExecutionFailureInfo)

Aggregations

ExecutionFailureInfo (io.trino.execution.ExecutionFailureInfo)6 ErrorCode (io.trino.spi.ErrorCode)3 TrinoException (io.trino.spi.TrinoException)3 BasicQueryInfo (io.trino.server.BasicQueryInfo)2 MoreFutures.asVoid (io.airlift.concurrent.MoreFutures.asVoid)1 FailureInfo (io.trino.client.FailureInfo)1 QueryError (io.trino.client.QueryError)1 Failure (io.trino.execution.Failure)1 QueryInfo (io.trino.execution.QueryInfo)1 QueryState (io.trino.execution.QueryState)1 RemoteTask (io.trino.execution.RemoteTask)1 TaskId (io.trino.execution.TaskId)1 TaskState (io.trino.execution.TaskState)1 MemoryRequirements (io.trino.execution.scheduler.PartitionMemoryEstimator.MemoryRequirements)1 HostAddress (io.trino.spi.HostAddress)1 StandardErrorCode (io.trino.spi.StandardErrorCode)1 TrinoTransportException (io.trino.spi.TrinoTransportException)1 Objects (java.util.Objects)1 Test (org.testng.annotations.Test)1