Search in sources :

Example 1 with TaskId

use of io.trino.execution.TaskId in project trino by trinodb.

the class TotalReservationOnBlockedNodesLowMemoryKiller method chooseTasksToKill.

private Optional<KillTarget> chooseTasksToKill(List<MemoryInfo> nodes) {
    ImmutableSet.Builder<TaskId> tasksToKillBuilder = ImmutableSet.builder();
    for (MemoryInfo node : nodes) {
        MemoryPoolInfo memoryPool = node.getPool();
        if (memoryPool == null) {
            continue;
        }
        if (memoryPool.getFreeBytes() + memoryPool.getReservedRevocableBytes() > 0) {
            continue;
        }
        node.getTasksMemoryInfo().values().stream().max(comparing(TaskMemoryInfo::getMemoryReservation)).map(TaskMemoryInfo::getTaskId).ifPresent(tasksToKillBuilder::add);
    }
    Set<TaskId> tasksToKill = tasksToKillBuilder.build();
    if (tasksToKill.isEmpty()) {
        return Optional.empty();
    }
    return Optional.of(KillTarget.selectedTasks(tasksToKill));
}
Also used : TaskMemoryInfo(io.trino.TaskMemoryInfo) TaskId(io.trino.execution.TaskId) ImmutableSet(com.google.common.collect.ImmutableSet) MemoryPoolInfo(io.trino.spi.memory.MemoryPoolInfo)

Example 2 with TaskId

use of io.trino.execution.TaskId in project trino by trinodb.

the class FaultTolerantStageScheduler method updateTaskStatus.

private void updateTaskStatus(TaskStatus taskStatus, Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle) {
    TaskState state = taskStatus.getState();
    if (!state.isDone()) {
        return;
    }
    try {
        RuntimeException failure = null;
        SettableFuture<Void> future;
        synchronized (this) {
            TaskId taskId = taskStatus.getTaskId();
            runningTasks.remove(taskId);
            future = taskFinishedFuture;
            if (!runningTasks.isEmpty()) {
                taskFinishedFuture = SettableFuture.create();
            } else {
                taskFinishedFuture = null;
            }
            NodeAllocator.NodeLease nodeLease = requireNonNull(runningNodes.remove(taskId), () -> "node not found for task id: " + taskId);
            nodeLease.release();
            int partitionId = taskId.getPartitionId();
            if (!finishedPartitions.contains(partitionId) && !closed) {
                switch(state) {
                    case FINISHED:
                        finishedPartitions.add(partitionId);
                        if (sinkExchange.isPresent()) {
                            checkArgument(exchangeSinkInstanceHandle.isPresent(), "exchangeSinkInstanceHandle is expected to be present");
                            sinkExchange.get().sinkFinished(exchangeSinkInstanceHandle.get());
                        }
                        partitionToRemoteTaskMap.get(partitionId).forEach(RemoteTask::abort);
                        break;
                    case CANCELED:
                        log.debug("Task cancelled: %s", taskId);
                        break;
                    case ABORTED:
                        log.debug("Task aborted: %s", taskId);
                        break;
                    case FAILED:
                        ExecutionFailureInfo failureInfo = taskStatus.getFailures().stream().findFirst().map(this::rewriteTransportFailure).orElse(toFailure(new TrinoException(GENERIC_INTERNAL_ERROR, "A task failed for an unknown reason")));
                        log.warn(failureInfo.toException(), "Task failed: %s", taskId);
                        ErrorCode errorCode = failureInfo.getErrorCode();
                        int taskRemainingAttempts = remainingAttemptsPerTask.getOrDefault(partitionId, maxRetryAttemptsPerTask);
                        if (remainingRetryAttemptsOverall > 0 && taskRemainingAttempts > 0 && (errorCode == null || errorCode.getType() != USER_ERROR)) {
                            remainingRetryAttemptsOverall--;
                            remainingAttemptsPerTask.put(partitionId, taskRemainingAttempts - 1);
                            // update memory limits for next attempt
                            MemoryRequirements memoryLimits = partitionMemoryRequirements.get(partitionId);
                            verify(memoryLimits != null);
                            MemoryRequirements newMemoryLimits = partitionMemoryEstimator.getNextRetryMemoryRequirements(session, memoryLimits, errorCode);
                            partitionMemoryRequirements.put(partitionId, newMemoryLimits);
                            // reschedule
                            queuedPartitions.add(partitionId);
                            log.debug("Retrying partition %s for stage %s", partitionId, stage.getStageId());
                        } else {
                            failure = failureInfo.toException();
                        }
                        break;
                    default:
                        throw new IllegalArgumentException("Unexpected task state: " + state);
                }
            }
        }
        if (failure != null) {
            // must be called outside the lock
            fail(failure);
        }
        if (future != null && !future.isDone()) {
            future.set(null);
        }
    } catch (Throwable t) {
        fail(t);
    }
}
Also used : TaskId(io.trino.execution.TaskId) MemoryRequirements(io.trino.execution.scheduler.PartitionMemoryEstimator.MemoryRequirements) RemoteTask(io.trino.execution.RemoteTask) ExecutionFailureInfo(io.trino.execution.ExecutionFailureInfo) TrinoException(io.trino.spi.TrinoException) ErrorCode(io.trino.spi.ErrorCode) MoreFutures.asVoid(io.airlift.concurrent.MoreFutures.asVoid) TaskState(io.trino.execution.TaskState)

Example 3 with TaskId

use of io.trino.execution.TaskId in project trino by trinodb.

the class TestBroadcastOutputBuffer method createBroadcastBuffer.

private BroadcastOutputBuffer createBroadcastBuffer(OutputBuffers outputBuffers, DataSize dataSize, AggregatedMemoryContext memoryContext, Executor notificationExecutor) {
    BroadcastOutputBuffer buffer = new BroadcastOutputBuffer(TASK_INSTANCE_ID, new OutputBufferStateMachine(new TaskId(new StageId(new QueryId("query"), 0), 0, 0), stateNotificationExecutor), dataSize, () -> memoryContext.newLocalMemoryContext("test"), notificationExecutor, () -> {
    });
    buffer.setOutputBuffers(outputBuffers);
    return buffer;
}
Also used : TaskId(io.trino.execution.TaskId) StageId(io.trino.execution.StageId) QueryId(io.trino.spi.QueryId)

Example 4 with TaskId

use of io.trino.execution.TaskId in project trino by trinodb.

the class TestDirectExchangeClient method setUpDataCorruption.

private DirectExchangeClient setUpDataCorruption(DataIntegrityVerification dataIntegrityVerification, URI location) {
    DataSize maxResponseSize = DataSize.of(10, Unit.MEGABYTE);
    MockExchangeRequestProcessor delegate = new MockExchangeRequestProcessor(maxResponseSize);
    delegate.addPage(location, createPage(1));
    delegate.addPage(location, createPage(2));
    delegate.setComplete(location);
    TestingHttpClient.Processor processor = new TestingHttpClient.Processor() {

        private int completedRequests;

        private TestingResponse savedResponse;

        @Override
        public synchronized Response handle(Request request) throws Exception {
            if (completedRequests == 0) {
                verify(savedResponse == null);
                TestingResponse response = (TestingResponse) delegate.handle(request);
                checkState(response.getStatusCode() == HttpStatus.OK.code(), "Unexpected status code: %s", response.getStatusCode());
                ListMultimap<String, String> headers = response.getHeaders().entries().stream().collect(toImmutableListMultimap(entry -> entry.getKey().toString(), Map.Entry::getValue));
                byte[] bytes = toByteArray(response.getInputStream());
                checkState(bytes.length > 42, "too short");
                savedResponse = new TestingResponse(HttpStatus.OK, headers, bytes.clone());
                // corrupt
                bytes[42]++;
                completedRequests++;
                return new TestingResponse(HttpStatus.OK, headers, bytes);
            }
            if (completedRequests == 1) {
                verify(savedResponse != null);
                Response response = savedResponse;
                savedResponse = null;
                completedRequests++;
                return response;
            }
            completedRequests++;
            return delegate.handle(request);
        }
    };
    DirectExchangeClient exchangeClient = new DirectExchangeClient("localhost", dataIntegrityVerification, new StreamingDirectExchangeBuffer(scheduler, DataSize.of(32, Unit.MEGABYTE)), maxResponseSize, 1, new Duration(1, TimeUnit.MINUTES), true, new TestingHttpClient(processor, scheduler), scheduler, new SimpleLocalMemoryContext(newSimpleAggregatedMemoryContext(), "test"), pageBufferClientCallbackExecutor, (taskId, failure) -> {
    });
    exchangeClient.addLocation(new TaskId(new StageId("query", 1), 0, 0), location);
    exchangeClient.noMoreLocations();
    return exchangeClient;
}
Also used : QueryId(io.trino.spi.QueryId) ListMultimap(com.google.common.collect.ListMultimap) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) TimeoutException(java.util.concurrent.TimeoutException) BlockAssertions(io.trino.block.BlockAssertions) Test(org.testng.annotations.Test) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) Unit(io.airlift.units.DataSize.Unit) Duration(io.airlift.units.Duration) ImmutableListMultimap.toImmutableListMultimap(com.google.common.collect.ImmutableListMultimap.toImmutableListMultimap) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) Map(java.util.Map) URI(java.net.URI) Assert.assertFalse(org.testng.Assert.assertFalse) ImmutableSet(com.google.common.collect.ImmutableSet) SimpleLocalMemoryContext(io.trino.memory.context.SimpleLocalMemoryContext) ImmutableMap(com.google.common.collect.ImmutableMap) BeforeClass(org.testng.annotations.BeforeClass) TestingHttpClient(io.airlift.http.client.testing.TestingHttpClient) Set(java.util.Set) PagesSerde.getSerializedPagePositionCount(io.trino.execution.buffer.PagesSerde.getSerializedPagePositionCount) TrinoException(io.trino.spi.TrinoException) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Assert.assertNotNull(org.testng.Assert.assertNotNull) PagesSerde(io.trino.execution.buffer.PagesSerde) Uninterruptibles.sleepUninterruptibly(com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly) TaskId(io.trino.execution.TaskId) Executors(java.util.concurrent.Executors) Preconditions.checkState(com.google.common.base.Preconditions.checkState) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) TrinoTransportException(io.trino.spi.TrinoTransportException) CountDownLatch(java.util.concurrent.CountDownLatch) DataSize(io.airlift.units.DataSize) List(java.util.List) TestingPagesSerdeFactory.testingPagesSerde(io.trino.execution.buffer.TestingPagesSerdeFactory.testingPagesSerde) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Slice(io.airlift.slice.Slice) StageId(io.trino.execution.StageId) Assert.assertNull(org.testng.Assert.assertNull) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) ExchangeHandleResolver(io.trino.metadata.ExchangeHandleResolver) ExchangeId.createRandomExchangeId(io.trino.spi.exchange.ExchangeId.createRandomExchangeId) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Verify.verify(com.google.common.base.Verify.verify) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) Request(io.airlift.http.client.Request) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TestingResponse(io.airlift.http.client.testing.TestingResponse) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Assertions.assertLessThan(io.airlift.testing.Assertions.assertLessThan) ExecutorService(java.util.concurrent.ExecutorService) AfterClass(org.testng.annotations.AfterClass) Sets.newConcurrentHashSet(com.google.common.collect.Sets.newConcurrentHashSet) MoreFutures.tryGetFutureValue(io.airlift.concurrent.MoreFutures.tryGetFutureValue) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) ByteStreams.toByteArray(com.google.common.io.ByteStreams.toByteArray) DataIntegrityVerification(io.trino.FeaturesConfig.DataIntegrityVerification) TimeUnit(java.util.concurrent.TimeUnit) Futures(com.google.common.util.concurrent.Futures) HttpStatus(io.airlift.http.client.HttpStatus) Assert.assertEventually(io.trino.testing.assertions.Assert.assertEventually) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) Response(io.airlift.http.client.Response) Assert.assertTrue(org.testng.Assert.assertTrue) SECONDS(java.util.concurrent.TimeUnit.SECONDS) ExchangeManagerRegistry(io.trino.exchange.ExchangeManagerRegistry) TestingResponse(io.airlift.http.client.testing.TestingResponse) SimpleLocalMemoryContext(io.trino.memory.context.SimpleLocalMemoryContext) TaskId(io.trino.execution.TaskId) StageId(io.trino.execution.StageId) Request(io.airlift.http.client.Request) Duration(io.airlift.units.Duration) TestingResponse(io.airlift.http.client.testing.TestingResponse) Response(io.airlift.http.client.Response) DataSize(io.airlift.units.DataSize) TestingHttpClient(io.airlift.http.client.testing.TestingHttpClient) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 5 with TaskId

use of io.trino.execution.TaskId in project trino by trinodb.

the class TestDirectExchangeClient method testStreamingTaskFailure.

@Test
public void testStreamingTaskFailure() {
    DataSize maxResponseSize = DataSize.of(10, Unit.MEGABYTE);
    MockExchangeRequestProcessor processor = new MockExchangeRequestProcessor(maxResponseSize);
    TaskId task1 = new TaskId(new StageId("query", 1), 0, 0);
    TaskId task2 = new TaskId(new StageId("query", 1), 1, 0);
    URI location1 = URI.create("http://localhost:8080/1");
    URI location2 = URI.create("http://localhost:8080/2");
    processor.addPage(location1, createPage(1));
    StreamingDirectExchangeBuffer buffer = new StreamingDirectExchangeBuffer(scheduler, DataSize.of(1, Unit.MEGABYTE));
    DirectExchangeClient exchangeClient = new DirectExchangeClient("localhost", DataIntegrityVerification.ABORT, buffer, maxResponseSize, 1, new Duration(1, SECONDS), true, new TestingHttpClient(processor, scheduler), scheduler, new SimpleLocalMemoryContext(newSimpleAggregatedMemoryContext(), "test"), pageBufferClientCallbackExecutor, (taskId, failure) -> {
    });
    exchangeClient.addLocation(task1, location1);
    exchangeClient.addLocation(task2, location2);
    assertPageEquals(getNextPage(exchangeClient), createPage(1));
    processor.setComplete(location1);
    assertFalse(tryGetFutureValue(exchangeClient.isBlocked(), 10, MILLISECONDS).isPresent());
    RuntimeException randomException = new RuntimeException("randomfailure");
    processor.setFailed(location2, randomException);
    assertThatThrownBy(() -> getNextPage(exchangeClient)).hasMessageContaining("Encountered too many errors talking to a worker node");
    assertFalse(exchangeClient.isFinished());
}
Also used : TaskId(io.trino.execution.TaskId) SimpleLocalMemoryContext(io.trino.memory.context.SimpleLocalMemoryContext) DataSize(io.airlift.units.DataSize) StageId(io.trino.execution.StageId) TestingHttpClient(io.airlift.http.client.testing.TestingHttpClient) Duration(io.airlift.units.Duration) URI(java.net.URI) Test(org.testng.annotations.Test)

Aggregations

TaskId (io.trino.execution.TaskId)59 StageId (io.trino.execution.StageId)44 Test (org.testng.annotations.Test)42 QueryId (io.trino.spi.QueryId)26 Duration (io.airlift.units.Duration)23 DataSize (io.airlift.units.DataSize)14 SimpleLocalMemoryContext (io.trino.memory.context.SimpleLocalMemoryContext)13 URI (java.net.URI)13 DynamicFilterId (io.trino.sql.planner.plan.DynamicFilterId)12 TestingHttpClient (io.airlift.http.client.testing.TestingHttpClient)11 Slice (io.airlift.slice.Slice)10 DynamicFilter (io.trino.spi.connector.DynamicFilter)9 TestingTicker (io.airlift.testing.TestingTicker)8 Phaser (java.util.concurrent.Phaser)8 ImmutableList (com.google.common.collect.ImmutableList)7 TestingColumnHandle (io.trino.spi.connector.TestingColumnHandle)7 Symbol (io.trino.sql.planner.Symbol)7 SymbolAllocator (io.trino.sql.planner.SymbolAllocator)7 Optional (java.util.Optional)7 TrinoException (io.trino.spi.TrinoException)6