use of io.trino.execution.ExecutionFailureInfo in project trino by trinodb.
the class FaultTolerantStageScheduler method updateTaskStatus.
private void updateTaskStatus(TaskStatus taskStatus, Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle) {
TaskState state = taskStatus.getState();
if (!state.isDone()) {
return;
}
try {
RuntimeException failure = null;
SettableFuture<Void> future;
synchronized (this) {
TaskId taskId = taskStatus.getTaskId();
runningTasks.remove(taskId);
future = taskFinishedFuture;
if (!runningTasks.isEmpty()) {
taskFinishedFuture = SettableFuture.create();
} else {
taskFinishedFuture = null;
}
NodeAllocator.NodeLease nodeLease = requireNonNull(runningNodes.remove(taskId), () -> "node not found for task id: " + taskId);
nodeLease.release();
int partitionId = taskId.getPartitionId();
if (!finishedPartitions.contains(partitionId) && !closed) {
switch(state) {
case FINISHED:
finishedPartitions.add(partitionId);
if (sinkExchange.isPresent()) {
checkArgument(exchangeSinkInstanceHandle.isPresent(), "exchangeSinkInstanceHandle is expected to be present");
sinkExchange.get().sinkFinished(exchangeSinkInstanceHandle.get());
}
partitionToRemoteTaskMap.get(partitionId).forEach(RemoteTask::abort);
break;
case CANCELED:
log.debug("Task cancelled: %s", taskId);
break;
case ABORTED:
log.debug("Task aborted: %s", taskId);
break;
case FAILED:
ExecutionFailureInfo failureInfo = taskStatus.getFailures().stream().findFirst().map(this::rewriteTransportFailure).orElse(toFailure(new TrinoException(GENERIC_INTERNAL_ERROR, "A task failed for an unknown reason")));
log.warn(failureInfo.toException(), "Task failed: %s", taskId);
ErrorCode errorCode = failureInfo.getErrorCode();
int taskRemainingAttempts = remainingAttemptsPerTask.getOrDefault(partitionId, maxRetryAttemptsPerTask);
if (remainingRetryAttemptsOverall > 0 && taskRemainingAttempts > 0 && (errorCode == null || errorCode.getType() != USER_ERROR)) {
remainingRetryAttemptsOverall--;
remainingAttemptsPerTask.put(partitionId, taskRemainingAttempts - 1);
// update memory limits for next attempt
MemoryRequirements memoryLimits = partitionMemoryRequirements.get(partitionId);
verify(memoryLimits != null);
MemoryRequirements newMemoryLimits = partitionMemoryEstimator.getNextRetryMemoryRequirements(session, memoryLimits, errorCode);
partitionMemoryRequirements.put(partitionId, newMemoryLimits);
// reschedule
queuedPartitions.add(partitionId);
log.debug("Retrying partition %s for stage %s", partitionId, stage.getStageId());
} else {
failure = failureInfo.toException();
}
break;
default:
throw new IllegalArgumentException("Unexpected task state: " + state);
}
}
}
if (failure != null) {
// must be called outside the lock
fail(failure);
}
if (future != null && !future.isDone()) {
future.set(null);
}
} catch (Throwable t) {
fail(t);
}
}
use of io.trino.execution.ExecutionFailureInfo in project trino by trinodb.
the class FailedDispatchQuery method immediateFailureQueryInfo.
private static QueryInfo immediateFailureQueryInfo(Session session, String query, Optional<String> preparedQuery, URI self, Optional<ResourceGroupId> resourceGroupId, Throwable throwable) {
ExecutionFailureInfo failureCause = toFailure(throwable);
QueryInfo queryInfo = new QueryInfo(session.getQueryId(), session.toSessionRepresentation(), QueryState.FAILED, false, self, ImmutableList.of(), query, preparedQuery, immediateFailureQueryStats(), Optional.empty(), Optional.empty(), Optional.empty(), ImmutableMap.of(), ImmutableSet.of(), ImmutableMap.of(), ImmutableMap.of(), ImmutableSet.of(), Optional.empty(), false, null, Optional.empty(), failureCause, failureCause.getErrorCode(), ImmutableList.of(), ImmutableSet.of(), Optional.empty(), ImmutableList.of(), ImmutableList.of(), true, resourceGroupId, Optional.empty());
return queryInfo;
}
use of io.trino.execution.ExecutionFailureInfo in project trino by trinodb.
the class LocalDispatchQuery method getDispatchInfo.
@Override
public DispatchInfo getDispatchInfo() {
// observe submitted before getting the state, to ensure a failed query stat is visible
boolean dispatched = submitted.isDone();
BasicQueryInfo queryInfo = stateMachine.getBasicQueryInfo(Optional.empty());
if (queryInfo.getState() == QueryState.FAILED) {
ExecutionFailureInfo failureInfo = stateMachine.getFailureInfo().orElseGet(() -> toFailure(new TrinoException(GENERIC_INTERNAL_ERROR, "Query failed for an unknown reason")));
return DispatchInfo.failed(failureInfo, queryInfo.getQueryStats().getElapsedTime(), queryInfo.getQueryStats().getQueuedTime());
}
if (dispatched) {
return DispatchInfo.dispatched(new LocalCoordinatorLocation(), queryInfo.getQueryStats().getElapsedTime(), queryInfo.getQueryStats().getQueuedTime());
}
return DispatchInfo.queued(queryInfo.getQueryStats().getElapsedTime(), queryInfo.getQueryStats().getQueuedTime());
}
use of io.trino.execution.ExecutionFailureInfo in project trino by trinodb.
the class TestFailures method testToFailureLoop.
@Test
public void testToFailureLoop() {
Throwable exception1 = new TrinoException(TOO_MANY_REQUESTS_FAILED, "fake exception 1");
Throwable exception2 = new RuntimeException("fake exception 2", exception1);
exception1.addSuppressed(exception2);
// add exception 1 --> add suppress (exception 2) --> add cause (exception 1)
ExecutionFailureInfo failure = toFailure(exception1);
assertEquals(failure.getMessage(), "fake exception 1");
assertNull(failure.getCause());
assertEquals(failure.getSuppressed().size(), 1);
assertEquals(failure.getSuppressed().get(0).getMessage(), "fake exception 2");
assertEquals(failure.getErrorCode(), TOO_MANY_REQUESTS_FAILED.toErrorCode());
// add exception 2 --> add cause (exception 2) --> add suppress (exception 1)
failure = toFailure(exception2);
assertEquals(failure.getMessage(), "fake exception 2");
assertNotNull(failure.getCause());
assertEquals(failure.getCause().getMessage(), "fake exception 1");
assertEquals(failure.getSuppressed().size(), 0);
assertEquals(failure.getErrorCode(), TOO_MANY_REQUESTS_FAILED.toErrorCode());
// add exception 1 --> add suppress (exception 2) --> add suppress (exception 1)
exception1 = new TrinoException(TOO_MANY_REQUESTS_FAILED, "fake exception 1");
exception2 = new RuntimeException("fake exception 2");
exception1.addSuppressed(exception2);
exception2.addSuppressed(exception1);
failure = toFailure(exception1);
assertEquals(failure.getMessage(), "fake exception 1");
assertNull(failure.getCause());
assertEquals(failure.getSuppressed().size(), 1);
assertEquals(failure.getSuppressed().get(0).getMessage(), "fake exception 2");
assertEquals(failure.getErrorCode(), TOO_MANY_REQUESTS_FAILED.toErrorCode());
// add exception 2 --> add cause (exception 1) --> add cause (exception 2)
exception1 = new RuntimeException("fake exception 1");
exception2 = new RuntimeException("fake exception 2", exception1);
exception1.initCause(exception2);
failure = toFailure(exception2);
assertEquals(failure.getMessage(), "fake exception 2");
assertNotNull(failure.getCause());
assertEquals(failure.getCause().getMessage(), "fake exception 1");
assertEquals(failure.getSuppressed().size(), 0);
assertEquals(failure.getErrorCode(), GENERIC_INTERNAL_ERROR.toErrorCode());
}
use of io.trino.execution.ExecutionFailureInfo in project trino by trinodb.
the class Query method toQueryError.
private static QueryError toQueryError(QueryInfo queryInfo, Optional<Throwable> exception) {
QueryState state = queryInfo.getState();
if (state != FAILED && exception.isEmpty()) {
return null;
}
ExecutionFailureInfo executionFailure;
if (queryInfo.getFailureInfo() != null) {
executionFailure = queryInfo.getFailureInfo();
} else if (exception.isPresent()) {
executionFailure = toFailure(exception.get());
} else {
log.warn("Query %s in state %s has no failure info", queryInfo.getQueryId(), state);
executionFailure = toFailure(new RuntimeException(format("Query is %s (reason unknown)", state)));
}
FailureInfo failure = executionFailure.toFailureInfo();
ErrorCode errorCode;
if (queryInfo.getErrorCode() != null) {
errorCode = queryInfo.getErrorCode();
} else if (exception.isPresent()) {
errorCode = SERIALIZATION_ERROR.toErrorCode();
} else {
errorCode = GENERIC_INTERNAL_ERROR.toErrorCode();
log.warn("Failed query %s has no error code", queryInfo.getQueryId());
}
return new QueryError(firstNonNull(failure.getMessage(), "Internal error"), null, errorCode.getCode(), errorCode.getName(), errorCode.getType().toString(), failure.getErrorLocation(), failure);
}
Aggregations