use of io.trino.execution.TaskId in project trino by trinodb.
the class TotalReservationOnBlockedNodesLowMemoryKiller method chooseTasksToKill.
private Optional<KillTarget> chooseTasksToKill(List<MemoryInfo> nodes) {
ImmutableSet.Builder<TaskId> tasksToKillBuilder = ImmutableSet.builder();
for (MemoryInfo node : nodes) {
MemoryPoolInfo memoryPool = node.getPool();
if (memoryPool == null) {
continue;
}
if (memoryPool.getFreeBytes() + memoryPool.getReservedRevocableBytes() > 0) {
continue;
}
node.getTasksMemoryInfo().values().stream().max(comparing(TaskMemoryInfo::getMemoryReservation)).map(TaskMemoryInfo::getTaskId).ifPresent(tasksToKillBuilder::add);
}
Set<TaskId> tasksToKill = tasksToKillBuilder.build();
if (tasksToKill.isEmpty()) {
return Optional.empty();
}
return Optional.of(KillTarget.selectedTasks(tasksToKill));
}
use of io.trino.execution.TaskId in project trino by trinodb.
the class FaultTolerantStageScheduler method updateTaskStatus.
private void updateTaskStatus(TaskStatus taskStatus, Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle) {
TaskState state = taskStatus.getState();
if (!state.isDone()) {
return;
}
try {
RuntimeException failure = null;
SettableFuture<Void> future;
synchronized (this) {
TaskId taskId = taskStatus.getTaskId();
runningTasks.remove(taskId);
future = taskFinishedFuture;
if (!runningTasks.isEmpty()) {
taskFinishedFuture = SettableFuture.create();
} else {
taskFinishedFuture = null;
}
NodeAllocator.NodeLease nodeLease = requireNonNull(runningNodes.remove(taskId), () -> "node not found for task id: " + taskId);
nodeLease.release();
int partitionId = taskId.getPartitionId();
if (!finishedPartitions.contains(partitionId) && !closed) {
switch(state) {
case FINISHED:
finishedPartitions.add(partitionId);
if (sinkExchange.isPresent()) {
checkArgument(exchangeSinkInstanceHandle.isPresent(), "exchangeSinkInstanceHandle is expected to be present");
sinkExchange.get().sinkFinished(exchangeSinkInstanceHandle.get());
}
partitionToRemoteTaskMap.get(partitionId).forEach(RemoteTask::abort);
break;
case CANCELED:
log.debug("Task cancelled: %s", taskId);
break;
case ABORTED:
log.debug("Task aborted: %s", taskId);
break;
case FAILED:
ExecutionFailureInfo failureInfo = taskStatus.getFailures().stream().findFirst().map(this::rewriteTransportFailure).orElse(toFailure(new TrinoException(GENERIC_INTERNAL_ERROR, "A task failed for an unknown reason")));
log.warn(failureInfo.toException(), "Task failed: %s", taskId);
ErrorCode errorCode = failureInfo.getErrorCode();
int taskRemainingAttempts = remainingAttemptsPerTask.getOrDefault(partitionId, maxRetryAttemptsPerTask);
if (remainingRetryAttemptsOverall > 0 && taskRemainingAttempts > 0 && (errorCode == null || errorCode.getType() != USER_ERROR)) {
remainingRetryAttemptsOverall--;
remainingAttemptsPerTask.put(partitionId, taskRemainingAttempts - 1);
// update memory limits for next attempt
MemoryRequirements memoryLimits = partitionMemoryRequirements.get(partitionId);
verify(memoryLimits != null);
MemoryRequirements newMemoryLimits = partitionMemoryEstimator.getNextRetryMemoryRequirements(session, memoryLimits, errorCode);
partitionMemoryRequirements.put(partitionId, newMemoryLimits);
// reschedule
queuedPartitions.add(partitionId);
log.debug("Retrying partition %s for stage %s", partitionId, stage.getStageId());
} else {
failure = failureInfo.toException();
}
break;
default:
throw new IllegalArgumentException("Unexpected task state: " + state);
}
}
}
if (failure != null) {
// must be called outside the lock
fail(failure);
}
if (future != null && !future.isDone()) {
future.set(null);
}
} catch (Throwable t) {
fail(t);
}
}
use of io.trino.execution.TaskId in project trino by trinodb.
the class TestBroadcastOutputBuffer method createBroadcastBuffer.
private BroadcastOutputBuffer createBroadcastBuffer(OutputBuffers outputBuffers, DataSize dataSize, AggregatedMemoryContext memoryContext, Executor notificationExecutor) {
BroadcastOutputBuffer buffer = new BroadcastOutputBuffer(TASK_INSTANCE_ID, new OutputBufferStateMachine(new TaskId(new StageId(new QueryId("query"), 0), 0, 0), stateNotificationExecutor), dataSize, () -> memoryContext.newLocalMemoryContext("test"), notificationExecutor, () -> {
});
buffer.setOutputBuffers(outputBuffers);
return buffer;
}
use of io.trino.execution.TaskId in project trino by trinodb.
the class TestDirectExchangeClient method setUpDataCorruption.
private DirectExchangeClient setUpDataCorruption(DataIntegrityVerification dataIntegrityVerification, URI location) {
DataSize maxResponseSize = DataSize.of(10, Unit.MEGABYTE);
MockExchangeRequestProcessor delegate = new MockExchangeRequestProcessor(maxResponseSize);
delegate.addPage(location, createPage(1));
delegate.addPage(location, createPage(2));
delegate.setComplete(location);
TestingHttpClient.Processor processor = new TestingHttpClient.Processor() {
private int completedRequests;
private TestingResponse savedResponse;
@Override
public synchronized Response handle(Request request) throws Exception {
if (completedRequests == 0) {
verify(savedResponse == null);
TestingResponse response = (TestingResponse) delegate.handle(request);
checkState(response.getStatusCode() == HttpStatus.OK.code(), "Unexpected status code: %s", response.getStatusCode());
ListMultimap<String, String> headers = response.getHeaders().entries().stream().collect(toImmutableListMultimap(entry -> entry.getKey().toString(), Map.Entry::getValue));
byte[] bytes = toByteArray(response.getInputStream());
checkState(bytes.length > 42, "too short");
savedResponse = new TestingResponse(HttpStatus.OK, headers, bytes.clone());
// corrupt
bytes[42]++;
completedRequests++;
return new TestingResponse(HttpStatus.OK, headers, bytes);
}
if (completedRequests == 1) {
verify(savedResponse != null);
Response response = savedResponse;
savedResponse = null;
completedRequests++;
return response;
}
completedRequests++;
return delegate.handle(request);
}
};
DirectExchangeClient exchangeClient = new DirectExchangeClient("localhost", dataIntegrityVerification, new StreamingDirectExchangeBuffer(scheduler, DataSize.of(32, Unit.MEGABYTE)), maxResponseSize, 1, new Duration(1, TimeUnit.MINUTES), true, new TestingHttpClient(processor, scheduler), scheduler, new SimpleLocalMemoryContext(newSimpleAggregatedMemoryContext(), "test"), pageBufferClientCallbackExecutor, (taskId, failure) -> {
});
exchangeClient.addLocation(new TaskId(new StageId("query", 1), 0, 0), location);
exchangeClient.noMoreLocations();
return exchangeClient;
}
use of io.trino.execution.TaskId in project trino by trinodb.
the class TestDirectExchangeClient method testStreamingTaskFailure.
@Test
public void testStreamingTaskFailure() {
DataSize maxResponseSize = DataSize.of(10, Unit.MEGABYTE);
MockExchangeRequestProcessor processor = new MockExchangeRequestProcessor(maxResponseSize);
TaskId task1 = new TaskId(new StageId("query", 1), 0, 0);
TaskId task2 = new TaskId(new StageId("query", 1), 1, 0);
URI location1 = URI.create("http://localhost:8080/1");
URI location2 = URI.create("http://localhost:8080/2");
processor.addPage(location1, createPage(1));
StreamingDirectExchangeBuffer buffer = new StreamingDirectExchangeBuffer(scheduler, DataSize.of(1, Unit.MEGABYTE));
DirectExchangeClient exchangeClient = new DirectExchangeClient("localhost", DataIntegrityVerification.ABORT, buffer, maxResponseSize, 1, new Duration(1, SECONDS), true, new TestingHttpClient(processor, scheduler), scheduler, new SimpleLocalMemoryContext(newSimpleAggregatedMemoryContext(), "test"), pageBufferClientCallbackExecutor, (taskId, failure) -> {
});
exchangeClient.addLocation(task1, location1);
exchangeClient.addLocation(task2, location2);
assertPageEquals(getNextPage(exchangeClient), createPage(1));
processor.setComplete(location1);
assertFalse(tryGetFutureValue(exchangeClient.isBlocked(), 10, MILLISECONDS).isPresent());
RuntimeException randomException = new RuntimeException("randomfailure");
processor.setFailed(location2, randomException);
assertThatThrownBy(() -> getNextPage(exchangeClient)).hasMessageContaining("Encountered too many errors talking to a worker node");
assertFalse(exchangeClient.isFinished());
}
Aggregations