Search in sources :

Example 1 with PartitionProducerStateChecker

use of org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker in project flink by apache.

the class TaskTest method testTriggerPartitionStateUpdate.

/**
 * Tests the trigger partition state update future completions.
 */
@Test
public void testTriggerPartitionStateUpdate() throws Exception {
    final IntermediateDataSetID resultId = new IntermediateDataSetID();
    final ResultPartitionID partitionId = new ResultPartitionID();
    final PartitionProducerStateChecker partitionChecker = mock(PartitionProducerStateChecker.class);
    final ResultPartitionConsumableNotifier consumableNotifier = new NoOpResultPartitionConsumableNotifier();
    AtomicInteger callCount = new AtomicInteger(0);
    RemoteChannelStateChecker remoteChannelStateChecker = new RemoteChannelStateChecker(partitionId, "test task");
    // Test all branches of trigger partition state check
    {
        // Reset latches
        setup();
        // PartitionProducerDisposedException
        final Task task = createTaskBuilder().setInvokable(InvokableBlockingInInvoke.class).setConsumableNotifier(consumableNotifier).setPartitionProducerStateChecker(partitionChecker).setExecutor(Executors.directExecutor()).build();
        TestTaskBuilder.setTaskState(task, ExecutionState.RUNNING);
        final CompletableFuture<ExecutionState> promise = new CompletableFuture<>();
        when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
        task.requestPartitionProducerState(resultId, partitionId, checkResult -> assertThat(remoteChannelStateChecker.isProducerReadyOrAbortConsumption(checkResult), is(false)));
        promise.completeExceptionally(new PartitionProducerDisposedException(partitionId));
        assertEquals(ExecutionState.CANCELING, task.getExecutionState());
    }
    {
        // Reset latches
        setup();
        // Any other exception
        final Task task = createTaskBuilder().setInvokable(InvokableBlockingInInvoke.class).setConsumableNotifier(consumableNotifier).setPartitionProducerStateChecker(partitionChecker).setExecutor(Executors.directExecutor()).build();
        TestTaskBuilder.setTaskState(task, ExecutionState.RUNNING);
        final CompletableFuture<ExecutionState> promise = new CompletableFuture<>();
        when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
        task.requestPartitionProducerState(resultId, partitionId, checkResult -> assertThat(remoteChannelStateChecker.isProducerReadyOrAbortConsumption(checkResult), is(false)));
        promise.completeExceptionally(new RuntimeException("Any other exception"));
        assertEquals(ExecutionState.FAILED, task.getExecutionState());
    }
    {
        callCount.set(0);
        // Reset latches
        setup();
        // TimeoutException handled special => retry
        // Any other exception
        final Task task = createTaskBuilder().setInvokable(InvokableBlockingInInvoke.class).setConsumableNotifier(consumableNotifier).setPartitionProducerStateChecker(partitionChecker).setExecutor(Executors.directExecutor()).build();
        try {
            task.startTaskThread();
            awaitLatch.await();
            CompletableFuture<ExecutionState> promise = new CompletableFuture<>();
            when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
            task.requestPartitionProducerState(resultId, partitionId, checkResult -> {
                if (remoteChannelStateChecker.isProducerReadyOrAbortConsumption(checkResult)) {
                    callCount.incrementAndGet();
                }
            });
            promise.completeExceptionally(new TimeoutException());
            assertEquals(ExecutionState.RUNNING, task.getExecutionState());
            assertEquals(1, callCount.get());
        } finally {
            task.getExecutingThread().interrupt();
            task.getExecutingThread().join();
        }
    }
    {
        callCount.set(0);
        // Reset latches
        setup();
        // Success
        final Task task = createTaskBuilder().setInvokable(InvokableBlockingInInvoke.class).setConsumableNotifier(consumableNotifier).setPartitionProducerStateChecker(partitionChecker).setExecutor(Executors.directExecutor()).build();
        try {
            task.startTaskThread();
            awaitLatch.await();
            CompletableFuture<ExecutionState> promise = new CompletableFuture<>();
            when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
            task.requestPartitionProducerState(resultId, partitionId, checkResult -> {
                if (remoteChannelStateChecker.isProducerReadyOrAbortConsumption(checkResult)) {
                    callCount.incrementAndGet();
                }
            });
            promise.complete(ExecutionState.RUNNING);
            assertEquals(ExecutionState.RUNNING, task.getExecutionState());
            assertEquals(1, callCount.get());
        } finally {
            task.getExecutingThread().interrupt();
            task.getExecutingThread().join();
        }
    }
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) NettyShuffleDescriptorBuilder(org.apache.flink.runtime.util.NettyShuffleDescriptorBuilder) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) ArgumentMatchers.eq(org.mockito.ArgumentMatchers.eq) RemoteChannelStateChecker(org.apache.flink.runtime.io.network.partition.consumer.RemoteChannelStateChecker) TimeoutException(java.util.concurrent.TimeoutException) PartitionDescriptorBuilder(org.apache.flink.runtime.shuffle.PartitionDescriptorBuilder) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) CoreMatchers.instanceOf(org.hamcrest.CoreMatchers.instanceOf) Assert.assertThat(org.junit.Assert.assertThat) Mockito.doThrow(org.mockito.Mockito.doThrow) WrappingRuntimeException(org.apache.flink.util.WrappingRuntimeException) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) ClassRule(org.junit.ClassRule) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) CheckpointType(org.apache.flink.runtime.checkpoint.CheckpointType) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) ExpectedTestException(org.apache.flink.runtime.operators.testutils.ExpectedTestException) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) BlockingQueue(java.util.concurrent.BlockingQueue) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) List(java.util.List) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) Assert.assertFalse(org.junit.Assert.assertFalse) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) Environment(org.apache.flink.runtime.execution.Environment) Mockito.mock(org.mockito.Mockito.mock) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) ShuffleDescriptor(org.apache.flink.runtime.shuffle.ShuffleDescriptor) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) CheckpointFailureReason(org.apache.flink.runtime.checkpoint.CheckpointFailureReason) Mockito.spy(org.mockito.Mockito.spy) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) TestingClassLoaderLease(org.apache.flink.runtime.execution.librarycache.TestingClassLoaderLease) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) Nonnull(javax.annotation.Nonnull) Before(org.junit.Before) CheckpointStorageLocationReference(org.apache.flink.runtime.state.CheckpointStorageLocationReference) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) PartitionProducerStateChecker(org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker) Executors(org.apache.flink.util.concurrent.Executors) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier) Assert.assertNull(org.junit.Assert.assertNull) PartitionDescriptor(org.apache.flink.runtime.shuffle.PartitionDescriptor) LinkedBlockingDeque(java.util.concurrent.LinkedBlockingDeque) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) RemoteChannelStateChecker(org.apache.flink.runtime.io.network.partition.consumer.RemoteChannelStateChecker) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier) CompletableFuture(java.util.concurrent.CompletableFuture) WrappingRuntimeException(org.apache.flink.util.WrappingRuntimeException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) PartitionProducerStateChecker(org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 2 with PartitionProducerStateChecker

use of org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker in project flink by apache.

the class TaskAsyncCallTest method createTask.

private Task createTask(Class<? extends AbstractInvokable> invokableClass) throws Exception {
    final TestingClassLoaderLease classLoaderHandle = TestingClassLoaderLease.newBuilder().setGetOrResolveClassLoaderFunction((permanentBlobKeys, urls) -> TestingUserCodeClassLoader.newBuilder().setClassLoader(new TestUserCodeClassLoader()).build()).build();
    ResultPartitionConsumableNotifier consumableNotifier = new NoOpResultPartitionConsumableNotifier();
    PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
    Executor executor = mock(Executor.class);
    TaskMetricGroup taskMetricGroup = UnregisteredMetricGroups.createUnregisteredTaskMetricGroup();
    JobInformation jobInformation = new JobInformation(new JobID(), "Job Name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.emptyList(), Collections.emptyList());
    TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "Test Task", 1, 1, invokableClass.getName(), new Configuration());
    return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), mock(MemoryManager.class), mock(IOManager.class), shuffleEnvironment, new KvStateService(new KvStateRegistry(), null, null), mock(BroadcastVariableManager.class), new TaskEventDispatcher(), ExternalResourceInfoProvider.NO_EXTERNAL_RESOURCES, new TestTaskStateManager(), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), new NoOpTaskOperatorEventGateway(), new TestGlobalAggregateManager(), classLoaderHandle, mock(FileCache.class), new TestingTaskManagerRuntimeInfo(), taskMetricGroup, consumableNotifier, partitionProducerStateChecker, executor);
}
Also used : CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) Assert.assertThat(org.junit.Assert.assertThat) TestTaskStateManager(org.apache.flink.runtime.state.TestTaskStateManager) Future(java.util.concurrent.Future) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) KvStateService(org.apache.flink.runtime.taskexecutor.KvStateService) After(org.junit.After) TestGlobalAggregateManager(org.apache.flink.runtime.taskexecutor.TestGlobalAggregateManager) TestLogger(org.apache.flink.util.TestLogger) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) Matchers.isOneOf(org.hamcrest.Matchers.isOneOf) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) SerializedValue(org.apache.flink.util.SerializedValue) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) Assert.assertFalse(org.junit.Assert.assertFalse) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) Environment(org.apache.flink.runtime.execution.Environment) Mockito.mock(org.mockito.Mockito.mock) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CompletableFuture(java.util.concurrent.CompletableFuture) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestingTaskManagerRuntimeInfo(org.apache.flink.runtime.util.TestingTaskManagerRuntimeInfo) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) TestingClassLoaderLease(org.apache.flink.runtime.execution.librarycache.TestingClassLoaderLease) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) Before(org.junit.Before) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) Executor(java.util.concurrent.Executor) Configuration(org.apache.flink.configuration.Configuration) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) FileCache(org.apache.flink.runtime.filecache.FileCache) Test(org.junit.Test) PartitionProducerStateChecker(org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TestingUserCodeClassLoader(org.apache.flink.runtime.util.TestingUserCodeClassLoader) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) Collections(java.util.Collections) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) KvStateService(org.apache.flink.runtime.taskexecutor.KvStateService) Executor(java.util.concurrent.Executor) TestingTaskManagerRuntimeInfo(org.apache.flink.runtime.util.TestingTaskManagerRuntimeInfo) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) PartitionProducerStateChecker(org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TestGlobalAggregateManager(org.apache.flink.runtime.taskexecutor.TestGlobalAggregateManager) FileCache(org.apache.flink.runtime.filecache.FileCache) TestTaskStateManager(org.apache.flink.runtime.state.TestTaskStateManager) TestingClassLoaderLease(org.apache.flink.runtime.execution.librarycache.TestingClassLoaderLease) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JobID(org.apache.flink.api.common.JobID)

Example 3 with PartitionProducerStateChecker

use of org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker in project flink by apache.

the class SynchronousCheckpointITCase method createTask.

// --------------------------		Boilerplate tools copied from the TaskAsyncCallTest
// --------------------------
private Task createTask(Class<? extends TaskInvokable> invokableClass) throws Exception {
    ResultPartitionConsumableNotifier consumableNotifier = new NoOpResultPartitionConsumableNotifier();
    PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
    Executor executor = mock(Executor.class);
    ShuffleEnvironment<?, ?> shuffleEnvironment = new NettyShuffleEnvironmentBuilder().build();
    TaskMetricGroup taskMetricGroup = UnregisteredMetricGroups.createUnregisteredTaskMetricGroup();
    JobInformation jobInformation = new JobInformation(new JobID(), "Job Name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.emptyList(), Collections.emptyList());
    TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "Test Task", 1, 1, invokableClass.getName(), new Configuration());
    return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), mock(MemoryManager.class), mock(IOManager.class), shuffleEnvironment, new KvStateService(new KvStateRegistry(), null, null), mock(BroadcastVariableManager.class), new TaskEventDispatcher(), ExternalResourceInfoProvider.NO_EXTERNAL_RESOURCES, new TestTaskStateManager(), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), new NoOpTaskOperatorEventGateway(), new TestGlobalAggregateManager(), TestingClassLoaderLease.newBuilder().build(), mock(FileCache.class), new TestingTaskManagerRuntimeInfo(), taskMetricGroup, consumableNotifier, partitionProducerStateChecker, executor);
}
Also used : KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) Task(org.apache.flink.runtime.taskmanager.Task) Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) KvStateService(org.apache.flink.runtime.taskexecutor.KvStateService) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) NoOpTaskOperatorEventGateway(org.apache.flink.runtime.taskmanager.NoOpTaskOperatorEventGateway) Executor(java.util.concurrent.Executor) TestingTaskManagerRuntimeInfo(org.apache.flink.runtime.util.TestingTaskManagerRuntimeInfo) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) PartitionProducerStateChecker(org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TestGlobalAggregateManager(org.apache.flink.runtime.taskexecutor.TestGlobalAggregateManager) FileCache(org.apache.flink.runtime.filecache.FileCache) TestTaskStateManager(org.apache.flink.runtime.state.TestTaskStateManager) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JobID(org.apache.flink.api.common.JobID)

Example 4 with PartitionProducerStateChecker

use of org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker in project flink by apache.

the class RemoteInputChannelTest method testOnFailedPartitionRequestDoesNotBlockNetworkThreads.

/**
 * Test to guard against FLINK-13249.
 */
@Test
public void testOnFailedPartitionRequestDoesNotBlockNetworkThreads() throws Exception {
    final long testBlockedWaitTimeoutMillis = 30_000L;
    final PartitionProducerStateChecker partitionProducerStateChecker = (jobId, intermediateDataSetId, resultPartitionId) -> CompletableFuture.completedFuture(ExecutionState.RUNNING);
    final NettyShuffleEnvironment shuffleEnvironment = new NettyShuffleEnvironmentBuilder().build();
    final Task task = new TestTaskBuilder(shuffleEnvironment).setPartitionProducerStateChecker(partitionProducerStateChecker).build();
    final SingleInputGate inputGate = new SingleInputGateBuilder().setPartitionProducerStateProvider(task).build();
    TestTaskBuilder.setTaskState(task, ExecutionState.RUNNING);
    final OneShotLatch ready = new OneShotLatch();
    final OneShotLatch blocker = new OneShotLatch();
    final AtomicBoolean timedOutOrInterrupted = new AtomicBoolean(false);
    final ConnectionManager blockingConnectionManager = new TestingConnectionManager() {

        @Override
        public PartitionRequestClient createPartitionRequestClient(ConnectionID connectionId) {
            ready.trigger();
            try {
                // We block here, in a section that holds the
                // SingleInputGate#requestLock
                blocker.await(testBlockedWaitTimeoutMillis, TimeUnit.MILLISECONDS);
            } catch (InterruptedException | TimeoutException e) {
                timedOutOrInterrupted.set(true);
            }
            return new TestingPartitionRequestClient();
        }
    };
    final RemoteInputChannel remoteInputChannel = InputChannelBuilder.newBuilder().setConnectionManager(blockingConnectionManager).buildRemoteChannel(inputGate);
    inputGate.setInputChannels(remoteInputChannel);
    final Thread simulatedNetworkThread = new Thread(() -> {
        try {
            ready.await();
            // We want to make sure that our simulated network thread does not
            // block on
            // SingleInputGate#requestLock as well through this call.
            remoteInputChannel.onFailedPartitionRequest();
            // Will only give free the blocker if we did not block ourselves.
            blocker.trigger();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
        }
    });
    simulatedNetworkThread.start();
    // The entry point to that will lead us into
    // blockingConnectionManager#createPartitionRequestClient(...).
    inputGate.requestPartitions();
    simulatedNetworkThread.join();
    Assert.assertFalse("Test ended by timeout or interruption - this indicates that the network thread was blocked.", timedOutOrInterrupted.get());
}
Also used : TestTaskBuilder(org.apache.flink.runtime.taskmanager.TestTaskBuilder) Arrays(java.util.Arrays) Matchers.isA(org.hamcrest.Matchers.isA) AvailabilityUtil.assertAvailability(org.apache.flink.runtime.io.network.partition.AvailabilityUtil.assertAvailability) ProducerFailedException(org.apache.flink.runtime.io.network.partition.ProducerFailedException) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Random(java.util.Random) PartitionRequestClient(org.apache.flink.runtime.io.network.PartitionRequestClient) NetworkBuffer(org.apache.flink.runtime.io.network.buffer.NetworkBuffer) AvailabilityUtil.assertPriorityAvailability(org.apache.flink.runtime.io.network.partition.AvailabilityUtil.assertPriorityAvailability) Lists(org.apache.flink.shaded.guava30.com.google.common.collect.Lists) Future(java.util.concurrent.Future) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) CheckpointStorageLocationReference.getDefault(org.apache.flink.runtime.state.CheckpointStorageLocationReference.getDefault) Assert.fail(org.junit.Assert.fail) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) CheckpointType(org.apache.flink.runtime.checkpoint.CheckpointType) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) EventSerializer(org.apache.flink.runtime.io.network.api.serialization.EventSerializer) TestBufferFactory.createBuffer(org.apache.flink.runtime.io.network.util.TestBufferFactory.createBuffer) ExpectedTestException(org.apache.flink.runtime.operators.testutils.ExpectedTestException) InputChannelTestUtils(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils) DataType(org.apache.flink.runtime.io.network.buffer.Buffer.DataType) ConnectionID(org.apache.flink.runtime.io.network.ConnectionID) FreeingBufferRecycler(org.apache.flink.runtime.io.network.buffer.FreeingBufferRecycler) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) Collectors(java.util.stream.Collectors) Buffer(org.apache.flink.runtime.io.network.buffer.Buffer) Executors(java.util.concurrent.Executors) Matchers.any(org.mockito.Matchers.any) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) Matchers.contains(org.hamcrest.Matchers.contains) Assert.assertFalse(org.junit.Assert.assertFalse) Optional(java.util.Optional) TestingPartitionRequestClient(org.apache.flink.runtime.io.network.TestingPartitionRequestClient) Matchers.is(org.hamcrest.Matchers.is) Queue(java.util.Queue) Mockito.mock(org.mockito.Mockito.mock) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) TestingConnectionManager(org.apache.flink.runtime.io.network.TestingConnectionManager) TestBufferFactory(org.apache.flink.runtime.io.network.util.TestBufferFactory) CHECKPOINT(org.apache.flink.runtime.checkpoint.CheckpointType.CHECKPOINT) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) InputChannelTestUtils.createSingleInputGate(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createSingleInputGate) Callable(java.util.concurrent.Callable) CompletableFuture(java.util.concurrent.CompletableFuture) Mockito.spy(org.mockito.Mockito.spy) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool) ArrayList(java.util.ArrayList) Matchers.hasProperty(org.hamcrest.Matchers.hasProperty) BufferBuilder(org.apache.flink.runtime.io.network.buffer.BufferBuilder) EventSerializer.toBuffer(org.apache.flink.runtime.io.network.api.serialization.EventSerializer.toBuffer) ChannelStateWriter(org.apache.flink.runtime.checkpoint.channel.ChannelStateWriter) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) CheckpointOptions.alignedWithTimeout(org.apache.flink.runtime.checkpoint.CheckpointOptions.alignedWithTimeout) InputChannelInfo(org.apache.flink.runtime.checkpoint.channel.InputChannelInfo) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) MemorySegment(org.apache.flink.core.memory.MemorySegment) MemorySegmentFactory(org.apache.flink.core.memory.MemorySegmentFactory) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) ConnectionManager(org.apache.flink.runtime.io.network.ConnectionManager) Assert.assertNotNull(org.junit.Assert.assertNotNull) BufferPool(org.apache.flink.runtime.io.network.buffer.BufferPool) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) PartitionProducerStateChecker(org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) BufferBuilderTestUtils.buildSingleBuffer(org.apache.flink.runtime.io.network.buffer.BufferBuilderTestUtils.buildSingleBuffer) BufferAndAvailability(org.apache.flink.runtime.io.network.partition.consumer.InputChannel.BufferAndAvailability) Task(org.apache.flink.runtime.taskmanager.Task) Assert.assertNull(org.junit.Assert.assertNull) NoOpBufferPool(org.apache.flink.runtime.io.network.buffer.NoOpBufferPool) Assert(org.junit.Assert) ArrayDeque(java.util.ArrayDeque) PartitionProducerStateProvider(org.apache.flink.runtime.io.network.partition.PartitionProducerStateProvider) Assert.assertEquals(org.junit.Assert.assertEquals) Task(org.apache.flink.runtime.taskmanager.Task) TestingConnectionManager(org.apache.flink.runtime.io.network.TestingConnectionManager) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) InputChannelTestUtils.createSingleInputGate(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createSingleInputGate) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ConnectionID(org.apache.flink.runtime.io.network.ConnectionID) TestingConnectionManager(org.apache.flink.runtime.io.network.TestingConnectionManager) ConnectionManager(org.apache.flink.runtime.io.network.ConnectionManager) TestTaskBuilder(org.apache.flink.runtime.taskmanager.TestTaskBuilder) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) PartitionProducerStateChecker(org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker) TestingPartitionRequestClient(org.apache.flink.runtime.io.network.TestingPartitionRequestClient) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 5 with PartitionProducerStateChecker

use of org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker in project flink by apache.

the class TaskTest method testExecutionFailsInNetworkRegistration.

private void testExecutionFailsInNetworkRegistration(List<ResultPartitionDeploymentDescriptor> resultPartitions, List<InputGateDeploymentDescriptor> inputGates) throws Exception {
    final String errorMessage = "Network buffer pool has already been destroyed.";
    final ResultPartitionConsumableNotifier consumableNotifier = new NoOpResultPartitionConsumableNotifier();
    final PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
    final QueuedNoOpTaskManagerActions taskManagerActions = new QueuedNoOpTaskManagerActions();
    final Task task = new TestTaskBuilder(shuffleEnvironment).setTaskManagerActions(taskManagerActions).setConsumableNotifier(consumableNotifier).setPartitionProducerStateChecker(partitionProducerStateChecker).setResultPartitions(resultPartitions).setInputGates(inputGates).build();
    // shut down the network to make the following task registration failure
    shuffleEnvironment.close();
    // should fail
    task.run();
    // verify final state
    assertEquals(ExecutionState.FAILED, task.getExecutionState());
    assertTrue(task.isCanceledOrFailed());
    assertTrue(task.getFailureCause().getMessage().contains(errorMessage));
    taskManagerActions.validateListenerMessage(ExecutionState.FAILED, task, new IllegalStateException(errorMessage));
}
Also used : PartitionProducerStateChecker(org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier)

Aggregations

PartitionProducerStateChecker (org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker)5 NettyShuffleEnvironmentBuilder (org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder)4 NoOpResultPartitionConsumableNotifier (org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier)4 ResultPartitionConsumableNotifier (org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier)4 CompletableFuture (java.util.concurrent.CompletableFuture)3 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)3 Configuration (org.apache.flink.configuration.Configuration)3 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)3 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)3 IOException (java.io.IOException)2 Collections (java.util.Collections)2 List (java.util.List)2 Executor (java.util.concurrent.Executor)2 Future (java.util.concurrent.Future)2 TimeoutException (java.util.concurrent.TimeoutException)2 JobID (org.apache.flink.api.common.JobID)2 BroadcastVariableManager (org.apache.flink.runtime.broadcast.BroadcastVariableManager)2 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)2 InputGateDeploymentDescriptor (org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor)2 ResultPartitionDeploymentDescriptor (org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor)2