use of io.trino.spi.ErrorCode in project trino by trinodb.
the class QueryStateMachine method getBasicQueryInfo.
public BasicQueryInfo getBasicQueryInfo(Optional<BasicStageStats> rootStage) {
// Query state must be captured first in order to provide a
// correct view of the query. For example, building this
// information, the query could finish, and the task states would
// never be visible.
QueryState state = queryState.get();
ErrorCode errorCode = null;
if (state == FAILED) {
ExecutionFailureInfo failureCause = this.failureCause.get();
if (failureCause != null) {
errorCode = failureCause.getErrorCode();
}
}
BasicStageStats stageStats = rootStage.orElse(EMPTY_STAGE_STATS);
BasicQueryStats queryStats = new BasicQueryStats(queryStateTimer.getCreateTime(), getEndTime().orElse(null), queryStateTimer.getQueuedTime(), queryStateTimer.getElapsedTime(), queryStateTimer.getExecutionTime(), stageStats.getTotalDrivers(), stageStats.getQueuedDrivers(), stageStats.getRunningDrivers(), stageStats.getCompletedDrivers(), stageStats.getRawInputDataSize(), stageStats.getRawInputPositions(), stageStats.getPhysicalInputDataSize(), stageStats.getCumulativeUserMemory(), stageStats.getFailedCumulativeUserMemory(), stageStats.getUserMemoryReservation(), stageStats.getTotalMemoryReservation(), succinctBytes(getPeakUserMemoryInBytes()), succinctBytes(getPeakTotalMemoryInBytes()), stageStats.getTotalCpuTime(), stageStats.getFailedCpuTime(), stageStats.getTotalScheduledTime(), stageStats.getFailedScheduledTime(), stageStats.isFullyBlocked(), stageStats.getBlockedReasons(), stageStats.getProgressPercentage());
return new BasicQueryInfo(queryId, session.toSessionRepresentation(), Optional.of(resourceGroup), state, stageStats.isScheduled(), self, query, Optional.ofNullable(updateType.get()), preparedQuery, queryStats, errorCode == null ? null : errorCode.getType(), errorCode, queryType);
}
use of io.trino.spi.ErrorCode in project trino by trinodb.
the class QueryStateMachine method getQueryInfo.
@VisibleForTesting
QueryInfo getQueryInfo(Optional<StageInfo> rootStage) {
// Query state must be captured first in order to provide a
// correct view of the query. For example, building this
// information, the query could finish, and the task states would
// never be visible.
QueryState state = queryState.get();
ExecutionFailureInfo failureCause = null;
ErrorCode errorCode = null;
if (state == FAILED) {
failureCause = this.failureCause.get();
if (failureCause != null) {
errorCode = failureCause.getErrorCode();
}
}
boolean completeInfo = getAllStages(rootStage).stream().allMatch(StageInfo::isCompleteInfo);
boolean isScheduled = isScheduled(rootStage);
return new QueryInfo(queryId, session.toSessionRepresentation(), state, isScheduled, self, outputManager.getQueryOutputInfo().map(QueryOutputInfo::getColumnNames).orElse(ImmutableList.of()), query, preparedQuery, getQueryStats(rootStage), Optional.ofNullable(setCatalog.get()), Optional.ofNullable(setSchema.get()), Optional.ofNullable(setPath.get()), setSessionProperties, resetSessionProperties, setRoles, addedPreparedStatements, deallocatedPreparedStatements, Optional.ofNullable(startedTransactionId.get()), clearTransactionId.get(), updateType.get(), rootStage, failureCause, errorCode, warningCollector.getWarnings(), inputs.get(), output.get(), referencedTables.get(), routines.get(), completeInfo, Optional.of(resourceGroup), queryType);
}
use of io.trino.spi.ErrorCode in project trino by trinodb.
the class FaultTolerantStageScheduler method updateTaskStatus.
private void updateTaskStatus(TaskStatus taskStatus, Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle) {
TaskState state = taskStatus.getState();
if (!state.isDone()) {
return;
}
try {
RuntimeException failure = null;
SettableFuture<Void> future;
synchronized (this) {
TaskId taskId = taskStatus.getTaskId();
runningTasks.remove(taskId);
future = taskFinishedFuture;
if (!runningTasks.isEmpty()) {
taskFinishedFuture = SettableFuture.create();
} else {
taskFinishedFuture = null;
}
NodeAllocator.NodeLease nodeLease = requireNonNull(runningNodes.remove(taskId), () -> "node not found for task id: " + taskId);
nodeLease.release();
int partitionId = taskId.getPartitionId();
if (!finishedPartitions.contains(partitionId) && !closed) {
switch(state) {
case FINISHED:
finishedPartitions.add(partitionId);
if (sinkExchange.isPresent()) {
checkArgument(exchangeSinkInstanceHandle.isPresent(), "exchangeSinkInstanceHandle is expected to be present");
sinkExchange.get().sinkFinished(exchangeSinkInstanceHandle.get());
}
partitionToRemoteTaskMap.get(partitionId).forEach(RemoteTask::abort);
break;
case CANCELED:
log.debug("Task cancelled: %s", taskId);
break;
case ABORTED:
log.debug("Task aborted: %s", taskId);
break;
case FAILED:
ExecutionFailureInfo failureInfo = taskStatus.getFailures().stream().findFirst().map(this::rewriteTransportFailure).orElse(toFailure(new TrinoException(GENERIC_INTERNAL_ERROR, "A task failed for an unknown reason")));
log.warn(failureInfo.toException(), "Task failed: %s", taskId);
ErrorCode errorCode = failureInfo.getErrorCode();
int taskRemainingAttempts = remainingAttemptsPerTask.getOrDefault(partitionId, maxRetryAttemptsPerTask);
if (remainingRetryAttemptsOverall > 0 && taskRemainingAttempts > 0 && (errorCode == null || errorCode.getType() != USER_ERROR)) {
remainingRetryAttemptsOverall--;
remainingAttemptsPerTask.put(partitionId, taskRemainingAttempts - 1);
// update memory limits for next attempt
MemoryRequirements memoryLimits = partitionMemoryRequirements.get(partitionId);
verify(memoryLimits != null);
MemoryRequirements newMemoryLimits = partitionMemoryEstimator.getNextRetryMemoryRequirements(session, memoryLimits, errorCode);
partitionMemoryRequirements.put(partitionId, newMemoryLimits);
// reschedule
queuedPartitions.add(partitionId);
log.debug("Retrying partition %s for stage %s", partitionId, stage.getStageId());
} else {
failure = failureInfo.toException();
}
break;
default:
throw new IllegalArgumentException("Unexpected task state: " + state);
}
}
}
if (failure != null) {
// must be called outside the lock
fail(failure);
}
if (future != null && !future.isDone()) {
future.set(null);
}
} catch (Throwable t) {
fail(t);
}
}
use of io.trino.spi.ErrorCode in project trino by trinodb.
the class SqlQueryScheduler method createDistributedStagesScheduler.
private synchronized Optional<DistributedStagesScheduler> createDistributedStagesScheduler(int attempt) {
verify(attempt == 0 || retryPolicy == RetryPolicy.QUERY, "unexpected attempt %s for retry policy %s", attempt, retryPolicy);
if (queryStateMachine.isDone()) {
return Optional.empty();
}
if (attempt > 0 && retryPolicy == RetryPolicy.QUERY) {
dynamicFilterService.registerQueryRetry(queryStateMachine.getQueryId(), attempt);
}
DistributedStagesScheduler distributedStagesScheduler;
switch(retryPolicy) {
case TASK:
ExchangeManager exchangeManager = exchangeManagerRegistry.getExchangeManager();
distributedStagesScheduler = FaultTolerantDistributedStagesScheduler.create(queryStateMachine, stageManager, failureDetector, taskSourceFactory, taskDescriptorStorage, exchangeManager, nodePartitioningManager, coordinatorStagesScheduler.getTaskLifecycleListener(), maxTaskRetryAttemptsOverall, maxTaskRetryAttemptsPerTask, schedulerExecutor, schedulerStats, nodeAllocatorService, partitionMemoryEstimator);
break;
case QUERY:
case NONE:
distributedStagesScheduler = PipelinedDistributedStagesScheduler.create(queryStateMachine, schedulerStats, nodeScheduler, nodePartitioningManager, stageManager, coordinatorStagesScheduler, executionPolicy, failureDetector, schedulerExecutor, splitSourceFactory, splitBatchSize, dynamicFilterService, tableExecuteContextManager, retryPolicy, attempt);
break;
default:
throw new IllegalArgumentException("Unexpected retry policy: " + retryPolicy);
}
this.distributedStagesScheduler.set(distributedStagesScheduler);
distributedStagesScheduler.addStateChangeListener(state -> {
if (queryStateMachine.getQueryState() == QueryState.STARTING && (state == DistributedStagesSchedulerState.RUNNING || state.isDone())) {
queryStateMachine.transitionToRunning();
}
if (state.isDone() && !state.isFailure()) {
stageManager.getDistributedStagesInTopologicalOrder().forEach(stage -> stageManager.get(stage.getStageId()).finish());
}
if (stageManager.getCoordinatorStagesInTopologicalOrder().isEmpty()) {
// otherwise defer query transitioning to the coordinator stages
if (state == DistributedStagesSchedulerState.FINISHED) {
queryStateMachine.transitionToFinishing();
} else if (state == DistributedStagesSchedulerState.CANCELED) {
// output stage was canceled
queryStateMachine.transitionToCanceled();
}
}
if (state == DistributedStagesSchedulerState.FAILED) {
StageFailureInfo stageFailureInfo = distributedStagesScheduler.getFailureCause().orElseGet(() -> new StageFailureInfo(toFailure(new VerifyException("distributedStagesScheduler failed but failure cause is not present")), Optional.empty()));
ErrorCode errorCode = stageFailureInfo.getFailureInfo().getErrorCode();
if (shouldRetry(errorCode)) {
long delayInMillis = min(retryInitialDelay.toMillis() * ((long) pow(2, currentAttempt.get())), retryMaxDelay.toMillis());
currentAttempt.incrementAndGet();
scheduleRetryWithDelay(delayInMillis);
} else {
stageManager.getDistributedStagesInTopologicalOrder().forEach(stage -> {
if (stageFailureInfo.getFailedStageId().isPresent() && stageFailureInfo.getFailedStageId().get().equals(stage.getStageId())) {
stage.fail(stageFailureInfo.getFailureInfo().toException());
} else {
stage.abort();
}
});
queryStateMachine.transitionToFailed(stageFailureInfo.getFailureInfo().toException());
}
}
});
return Optional.of(distributedStagesScheduler);
}
use of io.trino.spi.ErrorCode in project trino by trinodb.
the class TestQueryStateInfoResource method setUp.
@BeforeClass
public void setUp() {
server = TestingTrinoServer.create();
server.installPlugin(new TpchPlugin());
server.createCatalog("tpch", "tpch");
client = new JettyHttpClient();
Request request1 = preparePost().setUri(uriBuilderFrom(server.getBaseUrl()).replacePath("/v1/statement").build()).setBodyGenerator(createStaticBodyGenerator(LONG_LASTING_QUERY, UTF_8)).setHeader(TRINO_HEADERS.requestUser(), "user1").build();
queryResults = client.execute(request1, createJsonResponseHandler(QUERY_RESULTS_JSON_CODEC));
client.execute(prepareGet().setUri(queryResults.getNextUri()).build(), createJsonResponseHandler(QUERY_RESULTS_JSON_CODEC));
Request request2 = preparePost().setUri(uriBuilderFrom(server.getBaseUrl()).replacePath("/v1/statement").build()).setBodyGenerator(createStaticBodyGenerator(LONG_LASTING_QUERY, UTF_8)).setHeader(TRINO_HEADERS.requestUser(), "user2").build();
QueryResults queryResults2 = client.execute(request2, createJsonResponseHandler(jsonCodec(QueryResults.class)));
client.execute(prepareGet().setUri(queryResults2.getNextUri()).build(), createJsonResponseHandler(QUERY_RESULTS_JSON_CODEC));
// queries are started in the background, so they may not all be immediately visible
long start = System.nanoTime();
while (Duration.nanosSince(start).compareTo(new Duration(5, MINUTES)) < 0) {
List<BasicQueryInfo> queryInfos = client.execute(prepareGet().setUri(uriBuilderFrom(server.getBaseUrl()).replacePath("/v1/query").build()).setHeader(TRINO_HEADERS.requestUser(), "unknown").build(), createJsonResponseHandler(listJsonCodec(BasicQueryInfo.class)));
if (queryInfos.size() == 2) {
if (queryInfos.stream().allMatch(info -> info.getState() == RUNNING)) {
break;
}
List<ErrorCode> errorCodes = queryInfos.stream().filter(info -> info.getState() == FAILED).map(BasicQueryInfo::getErrorCode).collect(toImmutableList());
if (!errorCodes.isEmpty()) {
fail("setup queries failed with: " + errorCodes);
}
}
}
}
Aggregations