Search in sources :

Example 86 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseAfterAcknowledge.

/**
 * FLINK-5667
 *
 * <p>Tests that a concurrent cancel operation does not discard the state handles of an
 * acknowledged checkpoint. The situation can only happen if the cancel call is executed after
 * Environment.acknowledgeCheckpoint() and before the CloseableRegistry.unregisterClosable()
 * call.
 */
@Test
public void testAsyncCheckpointingConcurrentCloseAfterAcknowledge() throws Exception {
    final OneShotLatch acknowledgeCheckpointLatch = new OneShotLatch();
    final OneShotLatch completeAcknowledge = new OneShotLatch();
    CheckpointResponder checkpointResponder = mock(CheckpointResponder.class);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) {
            acknowledgeCheckpointLatch.trigger();
            // block here so that we can issue the concurrent cancel call
            while (true) {
                try {
                    // wait until we successfully await (no pun intended)
                    completeAcknowledge.await();
                    // when await() returns normally, we break out of the loop
                    break;
                } catch (InterruptedException e) {
                // survive interruptions that arise from thread pool
                // shutdown
                // production code cannot actually throw
                // InterruptedException from
                // checkpoint acknowledgement
                }
            }
            return null;
        }
    }).when(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), anyLong(), any(CheckpointMetrics.class), any(TaskStateSnapshot.class));
    TaskStateManager taskStateManager = new TaskStateManagerImpl(new JobID(1L, 2L), new ExecutionAttemptID(), mock(TaskLocalStateStoreImpl.class), new InMemoryStateChangelogStorage(), null, checkpointResponder);
    KeyedStateHandle managedKeyedStateHandle = mock(KeyedStateHandle.class);
    KeyedStateHandle rawKeyedStateHandle = mock(KeyedStateHandle.class);
    OperatorStateHandle managedOperatorStateHandle = mock(OperatorStreamStateHandle.class);
    OperatorStateHandle rawOperatorStateHandle = mock(OperatorStreamStateHandle.class);
    OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(DoneFuture.of(SnapshotResult.of(managedKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(rawKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(managedOperatorStateHandle)), DoneFuture.of(SnapshotResult.of(rawOperatorStateHandle)), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
    try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().setTaskName("mock-task").setTaskStateManager(taskStateManager).build()) {
        RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult))));
        MockStreamTask streamTask = task.streamTask;
        waitTaskIsRunning(streamTask, task.invocationFuture);
        final long checkpointId = 42L;
        streamTask.triggerCheckpointAsync(new CheckpointMetaData(checkpointId, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
        acknowledgeCheckpointLatch.await();
        ArgumentCaptor<TaskStateSnapshot> subtaskStateCaptor = ArgumentCaptor.forClass(TaskStateSnapshot.class);
        // check that the checkpoint has been completed
        verify(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), eq(checkpointId), any(CheckpointMetrics.class), subtaskStateCaptor.capture());
        TaskStateSnapshot subtaskStates = subtaskStateCaptor.getValue();
        OperatorSubtaskState subtaskState = subtaskStates.getSubtaskStateMappings().iterator().next().getValue();
        // check that the subtask state contains the expected state handles
        assertEquals(singleton(managedKeyedStateHandle), subtaskState.getManagedKeyedState());
        assertEquals(singleton(rawKeyedStateHandle), subtaskState.getRawKeyedState());
        assertEquals(singleton(managedOperatorStateHandle), subtaskState.getManagedOperatorState());
        assertEquals(singleton(rawOperatorStateHandle), subtaskState.getRawOperatorState());
        // check that the state handles have not been discarded
        verify(managedKeyedStateHandle, never()).discardState();
        verify(rawKeyedStateHandle, never()).discardState();
        verify(managedOperatorStateHandle, never()).discardState();
        verify(rawOperatorStateHandle, never()).discardState();
        streamTask.cancel();
        completeAcknowledge.trigger();
        // canceling the stream task after it has acknowledged the checkpoint should not discard
        // the state handles
        verify(managedKeyedStateHandle, never()).discardState();
        verify(rawKeyedStateHandle, never()).discardState();
        verify(managedOperatorStateHandle, never()).discardState();
        verify(rawOperatorStateHandle, never()).discardState();
        task.waitForTaskCompletion(true);
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) TaskLocalStateStoreImpl(org.apache.flink.runtime.state.TaskLocalStateStoreImpl) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 87 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class StreamTaskTest method testEmptySubtaskStateLeadsToStatelessAcknowledgment.

/**
 * FLINK-5985
 *
 * <p>This test ensures that empty snapshots (no op/keyed stated whatsoever) will be reported as
 * stateless tasks. This happens by translating an empty {@link SubtaskState} into reporting
 * 'null' to #acknowledgeCheckpoint.
 */
@Test
public void testEmptySubtaskStateLeadsToStatelessAcknowledgment() throws Exception {
    // latch blocks until the async checkpoint thread acknowledges
    final OneShotLatch checkpointCompletedLatch = new OneShotLatch();
    final List<SubtaskState> checkpointResult = new ArrayList<>(1);
    CheckpointResponder checkpointResponder = mock(CheckpointResponder.class);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            SubtaskState subtaskState = invocation.getArgument(4);
            checkpointResult.add(subtaskState);
            checkpointCompletedLatch.trigger();
            return null;
        }
    }).when(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), anyLong(), any(CheckpointMetrics.class), nullable(TaskStateSnapshot.class));
    TaskStateManager taskStateManager = new TaskStateManagerImpl(new JobID(1L, 2L), new ExecutionAttemptID(), mock(TaskLocalStateStoreImpl.class), new InMemoryStateChangelogStorage(), null, checkpointResponder);
    // mock the operator with empty snapshot result (all state handles are null)
    OneInputStreamOperator<String, String> statelessOperator = streamOperatorWithSnapshot(new OperatorSnapshotFutures());
    try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().setTaskStateManager(taskStateManager).build()) {
        RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(statelessOperator)));
        waitTaskIsRunning(task.streamTask, task.invocationFuture);
        task.streamTask.triggerCheckpointAsync(new CheckpointMetaData(42L, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
        checkpointCompletedLatch.await(30, TimeUnit.SECONDS);
        // ensure that 'null' was acknowledged as subtask state
        Assert.assertNull(checkpointResult.get(0));
        task.streamTask.cancel();
        task.waitForTaskCompletion(true);
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) ArrayList(java.util.ArrayList) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) TaskLocalStateStoreImpl(org.apache.flink.runtime.state.TaskLocalStateStoreImpl) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) SubtaskState(org.apache.flink.runtime.checkpoint.SubtaskState) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) InvocationOnMock(org.mockito.invocation.InvocationOnMock) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 88 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class SystemProcessingTimeServiceTest method testShutdownAndWaitPending.

@Test
public void testShutdownAndWaitPending() {
    final OneShotLatch blockUntilTriggered = new OneShotLatch();
    final AtomicBoolean timerExecutionFinished = new AtomicBoolean(false);
    final SystemProcessingTimeService timeService = createBlockingSystemProcessingTimeService(blockUntilTriggered, timerExecutionFinished);
    Assert.assertFalse(timeService.isTerminated());
    // this should time out.
    try {
        Assert.assertFalse(timeService.shutdownAndAwaitPending(1, TimeUnit.SECONDS));
    } catch (InterruptedException e) {
        Assert.fail("Unexpected interruption.");
    }
    // Let the timer proceed.
    blockUntilTriggered.trigger();
    // Now we should succeed in terminating the timer.
    try {
        Assert.assertTrue(timeService.shutdownAndAwaitPending(60, TimeUnit.SECONDS));
    } catch (InterruptedException e) {
        Assert.fail("Unexpected interruption.");
    }
    Assert.assertTrue(timerExecutionFinished.get());
    Assert.assertTrue(timeService.isTerminated());
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 89 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class SourceStreamTaskTestBase method testMetrics.

public void testMetrics(FunctionWithException<Environment, ? extends StreamTask<Integer, ?>, Exception> taskFactory, StreamOperatorFactory<?> operatorFactory, Matcher<Double> busyTimeMatcher) throws Exception {
    long sleepTime = 42;
    StreamTaskMailboxTestHarnessBuilder<Integer> builder = new StreamTaskMailboxTestHarnessBuilder<>(taskFactory, INT_TYPE_INFO);
    final Map<String, Metric> metrics = new ConcurrentHashMap<>();
    final TaskMetricGroup taskMetricGroup = StreamTaskTestHarness.createTaskMetricGroup(metrics);
    try (StreamTaskMailboxTestHarness<Integer> harness = builder.setupOutputForSingletonOperatorChain(operatorFactory).setTaskMetricGroup(taskMetricGroup).build()) {
        Future<Boolean> triggerFuture = harness.streamTask.triggerCheckpointAsync(new CheckpointMetaData(1L, System.currentTimeMillis()), CheckpointOptions.forCheckpointWithDefaultLocation());
        OneShotLatch checkpointAcknowledgeLatch = new OneShotLatch();
        harness.getCheckpointResponder().setAcknowledgeLatch(checkpointAcknowledgeLatch);
        assertFalse(triggerFuture.isDone());
        Thread.sleep(sleepTime);
        while (!triggerFuture.isDone()) {
            harness.streamTask.runMailboxStep();
        }
        Gauge<Long> checkpointStartDelayGauge = (Gauge<Long>) metrics.get(MetricNames.CHECKPOINT_START_DELAY_TIME);
        assertThat(checkpointStartDelayGauge.getValue(), greaterThanOrEqualTo(sleepTime * 1_000_000));
        Gauge<Double> busyTimeGauge = (Gauge<Double>) metrics.get(MetricNames.TASK_BUSY_TIME);
        assertThat(busyTimeGauge.getValue(), busyTimeMatcher);
        checkpointAcknowledgeLatch.await();
        TestCheckpointResponder.AcknowledgeReport acknowledgeReport = Iterables.getOnlyElement(harness.getCheckpointResponder().getAcknowledgeReports());
        assertThat(acknowledgeReport.getCheckpointMetrics().getCheckpointStartDelayNanos(), greaterThanOrEqualTo(sleepTime * 1_000_000));
    }
}
Also used : TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Gauge(org.apache.flink.metrics.Gauge) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Metric(org.apache.flink.metrics.Metric) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 90 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testWaitingForUnalignedChannelStatesIfFinishedOnRestore.

/**
 * This test verifies for tasks that finished on restore, when taking unaligned checkpoint the
 * asynchronous part would wait for the channel states futures get completed, which means the
 * barriers are aligned.
 */
@Test
public void testWaitingForUnalignedChannelStatesIfFinishedOnRestore() throws Exception {
    OperatorID operatorId = new OperatorID();
    try (StreamTaskMailboxTestHarness<String> harness = new StreamTaskMailboxTestHarnessBuilder<>(OneInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO).modifyStreamConfig(streamConfig -> streamConfig.setUnalignedCheckpointsEnabled(true)).addInput(BasicTypeInfo.STRING_TYPE_INFO, 3).setCollectNetworkEvents().setTaskStateSnapshot(1, TaskStateSnapshot.FINISHED_ON_RESTORE).setupOperatorChain(new TestFinishedOnRestoreStreamOperator()).chain(operatorId, new TestFinishedOnRestoreStreamOperator(operatorId), StringSerializer.INSTANCE).finish().build()) {
        // Finish the restore, including state initialization and open.
        harness.processAll();
        TestCheckpointResponder checkpointResponder = harness.getCheckpointResponder();
        checkpointResponder.setAcknowledgeLatch(new OneShotLatch());
        checkpointResponder.setDeclinedLatch(new OneShotLatch());
        CheckpointBarrier unalignedBarrier = new CheckpointBarrier(2, 2, CheckpointOptions.unaligned(CheckpointType.CHECKPOINT, getDefault()));
        // On first unaligned barrier, the task would take snapshot and start the asynchronous
        // part. We slightly extend the process to make the asynchronous part start executing
        // before the other barriers arrived.
        harness.processEvent(unalignedBarrier, 0, 0);
        Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
        // Finish the unaligned checkpoint.
        harness.processEvent(unalignedBarrier, 0, 1);
        harness.processEvent(unalignedBarrier, 0, 2);
        // Wait till the asynchronous part finished either normally or exceptionally.
        CommonTestUtils.waitUntilCondition(() -> checkpointResponder.getAcknowledgeLatch().isTriggered() || checkpointResponder.getDeclinedLatch().isTriggered(), Deadline.fromNow(Duration.ofSeconds(10)));
        assertEquals(Collections.singletonList(2L), checkpointResponder.getAcknowledgeReports().stream().map(TestCheckpointResponder.AbstractReport::getCheckpointId).collect(Collectors.toList()));
        assertEquals(Collections.emptyList(), checkpointResponder.getDeclineReports().stream().map(TestCheckpointResponder.AbstractReport::getCheckpointId).collect(Collectors.toList()));
    }
}
Also used : EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) Deadline(org.apache.flink.api.common.time.Deadline) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) SavepointType(org.apache.flink.runtime.checkpoint.SavepointType) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) ListState(org.apache.flink.api.common.state.ListState) Future(java.util.concurrent.Future) CheckpointStorageLocationReference.getDefault(org.apache.flink.runtime.state.CheckpointStorageLocationReference.getDefault) Duration(java.time.Duration) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) CheckpointType(org.apache.flink.runtime.checkpoint.CheckpointType) TestInputChannel(org.apache.flink.runtime.io.network.partition.consumer.TestInputChannel) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) Collectors(java.util.stream.Collectors) StopMode(org.apache.flink.runtime.io.network.api.StopMode) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) Matchers.contains(org.hamcrest.Matchers.contains) Assert.assertFalse(org.junit.Assert.assertFalse) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) EndOfPartitionEvent(org.apache.flink.runtime.io.network.api.EndOfPartitionEvent) Watermark(org.apache.flink.streaming.api.watermark.Watermark) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) CompletableFuture(java.util.concurrent.CompletableFuture) STRING_TYPE_INFO(org.apache.flink.api.common.typeinfo.BasicTypeInfo.STRING_TYPE_INFO) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Nullable(javax.annotation.Nullable) CheckpointStorageLocationReference(org.apache.flink.runtime.state.CheckpointStorageLocationReference) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) PartitionTestUtils(org.apache.flink.runtime.io.network.partition.PartitionTestUtils) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Test(org.junit.Test)

Aggregations

OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)138 Test (org.junit.Test)118 JobID (org.apache.flink.api.common.JobID)41 CompletableFuture (java.util.concurrent.CompletableFuture)38 ExecutionException (java.util.concurrent.ExecutionException)27 Configuration (org.apache.flink.configuration.Configuration)26 IOException (java.io.IOException)24 Before (org.junit.Before)24 FlinkException (org.apache.flink.util.FlinkException)23 TestLogger (org.apache.flink.util.TestLogger)21 File (java.io.File)20 UUID (java.util.UUID)18 TimeoutException (java.util.concurrent.TimeoutException)18 TestingResourceManagerGateway (org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway)18 Time (org.apache.flink.api.common.time.Time)17 TestingJobMasterGateway (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway)17 Rule (org.junit.Rule)17 Collections (java.util.Collections)16 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)16 RpcUtils (org.apache.flink.runtime.rpc.RpcUtils)16