Search in sources :

Example 6 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class LocalStateForwardingTest method testReportingFromTaskStateManagerToResponderAndTaskLocalStateStore.

/**
 * This tests that state that was reported to the {@link
 * org.apache.flink.runtime.state.TaskStateManager} is also reported to {@link
 * org.apache.flink.runtime.taskmanager.CheckpointResponder} and {@link
 * TaskLocalStateStoreImpl}.
 */
@Test
public void testReportingFromTaskStateManagerToResponderAndTaskLocalStateStore() throws Exception {
    final JobID jobID = new JobID();
    final AllocationID allocationID = new AllocationID();
    final ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
    final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(42L, 4711L);
    final CheckpointMetrics checkpointMetrics = new CheckpointMetrics();
    final int subtaskIdx = 42;
    JobVertexID jobVertexID = new JobVertexID();
    TaskStateSnapshot jmSnapshot = new TaskStateSnapshot();
    TaskStateSnapshot tmSnapshot = new TaskStateSnapshot();
    final AtomicBoolean jmReported = new AtomicBoolean(false);
    final AtomicBoolean tmReported = new AtomicBoolean(false);
    TestCheckpointResponder checkpointResponder = new TestCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID lJobID, ExecutionAttemptID lExecutionAttemptID, long lCheckpointId, CheckpointMetrics lCheckpointMetrics, TaskStateSnapshot lSubtaskState) {
            Assert.assertEquals(jobID, lJobID);
            Assert.assertEquals(executionAttemptID, lExecutionAttemptID);
            Assert.assertEquals(checkpointMetaData.getCheckpointId(), lCheckpointId);
            Assert.assertEquals(checkpointMetrics, lCheckpointMetrics);
            jmReported.set(true);
        }
    };
    Executor executor = Executors.directExecutor();
    LocalRecoveryDirectoryProviderImpl directoryProvider = new LocalRecoveryDirectoryProviderImpl(temporaryFolder.newFolder(), jobID, jobVertexID, subtaskIdx);
    LocalRecoveryConfig localRecoveryConfig = new LocalRecoveryConfig(directoryProvider);
    TaskLocalStateStore taskLocalStateStore = new TaskLocalStateStoreImpl(jobID, allocationID, jobVertexID, subtaskIdx, localRecoveryConfig, executor) {

        @Override
        public void storeLocalState(@Nonnegative long checkpointId, @Nullable TaskStateSnapshot localState) {
            Assert.assertEquals(tmSnapshot, localState);
            tmReported.set(true);
        }
    };
    StateChangelogStorage<?> stateChangelogStorage = new InMemoryStateChangelogStorage();
    TaskStateManagerImpl taskStateManager = new TaskStateManagerImpl(jobID, executionAttemptID, taskLocalStateStore, stateChangelogStorage, null, checkpointResponder);
    taskStateManager.reportTaskStateSnapshots(checkpointMetaData, checkpointMetrics, jmSnapshot, tmSnapshot);
    Assert.assertTrue("Reporting for JM state was not called.", jmReported.get());
    Assert.assertTrue("Reporting for TM state was not called.", tmReported.get());
}
Also used : TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskLocalStateStore(org.apache.flink.runtime.state.TaskLocalStateStore) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) LocalRecoveryDirectoryProviderImpl(org.apache.flink.runtime.state.LocalRecoveryDirectoryProviderImpl) LocalRecoveryConfig(org.apache.flink.runtime.state.LocalRecoveryConfig) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) Executor(java.util.concurrent.Executor) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) TaskLocalStateStoreImpl(org.apache.flink.runtime.state.TaskLocalStateStoreImpl) Nonnegative(javax.annotation.Nonnegative) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 7 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class OneInputStreamTaskTest method testSnapshottingAndRestoring.

/**
 * Tests that the stream operator can snapshot and restore the operator state of chained
 * operators.
 */
@Test
public void testSnapshottingAndRestoring() throws Exception {
    final Deadline deadline = Deadline.fromNow(Duration.ofMinutes(2));
    final OneInputStreamTaskTestHarness<String, String> testHarness = new OneInputStreamTaskTestHarness<>(OneInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    testHarness.setupOutputForSingletonOperatorChain();
    IdentityKeySelector<String> keySelector = new IdentityKeySelector<>();
    testHarness.configureForKeyedStream(keySelector, BasicTypeInfo.STRING_TYPE_INFO);
    long checkpointId = 1L;
    long checkpointTimestamp = 1L;
    int numberChainedTasks = 11;
    StreamConfig streamConfig = testHarness.getStreamConfig();
    configureChainedTestingStreamOperator(streamConfig, numberChainedTasks);
    TestTaskStateManager taskStateManager = testHarness.taskStateManager;
    // reset number of restore calls
    TestingStreamOperator.numberRestoreCalls = 0;
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    final OneInputStreamTask<String, String> streamTask = testHarness.getTask();
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, checkpointTimestamp);
    streamTask.triggerCheckpointAsync(checkpointMetaData, CheckpointOptions.forCheckpointWithDefaultLocation()).get();
    // since no state was set, there shouldn't be restore calls
    assertEquals(0, TestingStreamOperator.numberRestoreCalls);
    taskStateManager.getWaitForReportLatch().await();
    assertEquals(checkpointId, taskStateManager.getReportedCheckpointId());
    testHarness.endInput();
    testHarness.waitForTaskCompletion(deadline.timeLeft().toMillis());
    final OneInputStreamTaskTestHarness<String, String> restoredTaskHarness = new OneInputStreamTaskTestHarness<>(OneInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    restoredTaskHarness.configureForKeyedStream(keySelector, BasicTypeInfo.STRING_TYPE_INFO);
    restoredTaskHarness.setTaskStateSnapshot(checkpointId, taskStateManager.getLastJobManagerTaskStateSnapshot());
    StreamConfig restoredTaskStreamConfig = restoredTaskHarness.getStreamConfig();
    configureChainedTestingStreamOperator(restoredTaskStreamConfig, numberChainedTasks);
    TaskStateSnapshot stateHandles = taskStateManager.getLastJobManagerTaskStateSnapshot();
    Assert.assertEquals(numberChainedTasks, stateHandles.getSubtaskStateMappings().size());
    TestingStreamOperator.numberRestoreCalls = 0;
    // transfer state to new harness
    restoredTaskHarness.taskStateManager.restoreLatestCheckpointState(taskStateManager.getJobManagerTaskStateSnapshotsByCheckpointId());
    restoredTaskHarness.invoke();
    restoredTaskHarness.endInput();
    restoredTaskHarness.waitForTaskCompletion(deadline.timeLeft().toMillis());
    // restore of every chained operator should have been called
    assertEquals(numberChainedTasks, TestingStreamOperator.numberRestoreCalls);
    TestingStreamOperator.numberRestoreCalls = 0;
    TestingStreamOperator.numberSnapshotCalls = 0;
}
Also used : Deadline(org.apache.flink.api.common.time.Deadline) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) TestTaskStateManager(org.apache.flink.runtime.state.TestTaskStateManager) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) Test(org.junit.Test)

Example 8 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class SourceExternalCheckpointTriggerTest method testCheckpointsTriggeredBySource.

@Test
@SuppressWarnings("unchecked")
public void testCheckpointsTriggeredBySource() throws Exception {
    // set up the basic test harness
    final StreamTaskTestHarness<Long> testHarness = new StreamTaskTestHarness<>(SourceStreamTask::new, BasicTypeInfo.LONG_TYPE_INFO);
    testHarness.setupOutputForSingletonOperatorChain();
    testHarness.getExecutionConfig().setLatencyTrackingInterval(-1);
    final long numElements = 10;
    final long checkpointEvery = 3;
    // set up the source function
    ExternalCheckpointsSource source = new ExternalCheckpointsSource(numElements, checkpointEvery);
    StreamConfig streamConfig = testHarness.getStreamConfig();
    StreamSource<Long, ?> sourceOperator = new StreamSource<>(source);
    streamConfig.setStreamOperator(sourceOperator);
    streamConfig.setOperatorID(new OperatorID());
    // this starts the source thread
    testHarness.invoke();
    final StreamTask<Long, ?> sourceTask = testHarness.getTask();
    ready.await();
    // now send an external trigger that should be ignored
    assertTrue(sourceTask.triggerCheckpointAsync(new CheckpointMetaData(32, 829), CheckpointOptions.forCheckpointWithDefaultLocation()).get());
    // step by step let the source thread emit elements
    sync.trigger();
    verifyNextElement(testHarness.getOutput(), 1L);
    sync.trigger();
    verifyNextElement(testHarness.getOutput(), 2L);
    sync.trigger();
    verifyNextElement(testHarness.getOutput(), 3L);
    verifyCheckpointBarrier(testHarness.getOutput(), 1L);
    sync.trigger();
    verifyNextElement(testHarness.getOutput(), 4L);
    // now send an regular trigger command that should be ignored
    assertTrue(sourceTask.triggerCheckpointAsync(new CheckpointMetaData(34, 900), CheckpointOptions.forCheckpointWithDefaultLocation()).get());
    sync.trigger();
    verifyNextElement(testHarness.getOutput(), 5L);
    sync.trigger();
    verifyNextElement(testHarness.getOutput(), 6L);
    verifyCheckpointBarrier(testHarness.getOutput(), 2L);
    for (long l = 7L, checkpoint = 3L; l <= numElements; l++) {
        sync.trigger();
        verifyNextElement(testHarness.getOutput(), l);
        if (l % checkpointEvery == 0) {
            verifyCheckpointBarrier(testHarness.getOutput(), checkpoint++);
        }
    }
// done!
}
Also used : StreamSource(org.apache.flink.streaming.api.operators.StreamSource) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Test(org.junit.Test)

Example 9 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class SourceStreamTaskTest method testTriggeringStopWithSavepointWithDrain.

@Test
public void testTriggeringStopWithSavepointWithDrain() throws Exception {
    SourceFunction<String> testSource = new EmptySource();
    CompletableFuture<Boolean> checkpointCompleted = new CompletableFuture<>();
    CheckpointResponder checkpointResponder = new TestCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
            checkpointCompleted.complete(null);
        }
    };
    try (StreamTaskMailboxTestHarness<String> harness = new StreamTaskMailboxTestHarnessBuilder<>(SourceStreamTask::new, STRING_TYPE_INFO).setTaskStateSnapshot(1, TaskStateSnapshot.FINISHED_ON_RESTORE).setCheckpointResponder(checkpointResponder).setupOutputForSingletonOperatorChain(new StreamSource<>(testSource)).build()) {
        CompletableFuture<Boolean> triggerResult = harness.streamTask.triggerCheckpointAsync(new CheckpointMetaData(2, 2), CheckpointOptions.alignedNoTimeout(SavepointType.terminate(SavepointFormatType.CANONICAL), CheckpointStorageLocationReference.getDefault()));
        checkpointCompleted.whenComplete((ignored, exception) -> harness.streamTask.notifyCheckpointCompleteAsync(2));
        // Run mailbox till the source thread finished and suspend the mailbox
        harness.streamTask.runMailboxLoop();
        harness.finishProcessing();
        assertTrue(triggerResult.isDone());
        assertTrue(triggerResult.get());
        assertTrue(checkpointCompleted.isDone());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CompletableFuture(java.util.concurrent.CompletableFuture) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 10 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class SourceTaskTerminationTest method stopWithSavepointStreamTaskTestHelper.

private void stopWithSavepointStreamTaskTestHelper(final boolean shouldTerminate) throws Exception {
    final long syncSavepointId = 34L;
    try (StreamTaskMailboxTestHarness<Long> srcTaskTestHarness = getSourceStreamTaskTestHarness()) {
        final StreamTask<Long, ?> srcTask = srcTaskTestHarness.getStreamTask();
        srcTaskTestHarness.processAll();
        // step by step let the source thread emit elements
        emitAndVerifyWatermarkAndElement(srcTaskTestHarness, 1L);
        emitAndVerifyWatermarkAndElement(srcTaskTestHarness, 2L);
        srcTaskTestHarness.processUntil(srcTask.triggerCheckpointAsync(new CheckpointMetaData(31L, 900), CheckpointOptions.forCheckpointWithDefaultLocation())::isDone);
        verifyCheckpointBarrier(srcTaskTestHarness.getOutput(), 31L);
        emitAndVerifyWatermarkAndElement(srcTaskTestHarness, 3L);
        srcTaskTestHarness.processUntil(srcTask.triggerCheckpointAsync(new CheckpointMetaData(syncSavepointId, 900), new CheckpointOptions(shouldTerminate ? SavepointType.terminate(SavepointFormatType.CANONICAL) : SavepointType.suspend(SavepointFormatType.CANONICAL), CheckpointStorageLocationReference.getDefault()))::isDone);
        if (shouldTerminate) {
            // if we are in TERMINATE mode, we expect the source task
            // to emit MAX_WM before the SYNC_SAVEPOINT barrier.
            verifyWatermark(srcTaskTestHarness.getOutput(), Watermark.MAX_WATERMARK);
        }
        verifyEvent(srcTaskTestHarness.getOutput(), new EndOfData(shouldTerminate ? StopMode.DRAIN : StopMode.NO_DRAIN));
        verifyCheckpointBarrier(srcTaskTestHarness.getOutput(), syncSavepointId);
        waitForSynchronousSavepointIdToBeSet(srcTask);
        assertTrue(srcTask.getSynchronousSavepointId().isPresent());
        srcTaskTestHarness.processUntil(srcTask.notifyCheckpointCompleteAsync(syncSavepointId)::isDone);
        srcTaskTestHarness.waitForTaskCompletion();
    }
}
Also used : EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData)

Aggregations

CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)47 Test (org.junit.Test)33 CheckpointMetricsBuilder (org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder)16 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)15 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)13 IOException (java.io.IOException)12 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)12 MockEnvironment (org.apache.flink.runtime.operators.testutils.MockEnvironment)11 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)11 OperatorSnapshotFutures (org.apache.flink.streaming.api.operators.OperatorSnapshotFutures)11 JobID (org.apache.flink.api.common.JobID)10 CheckpointException (org.apache.flink.runtime.checkpoint.CheckpointException)10 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)10 ExecutionException (java.util.concurrent.ExecutionException)9 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)9 CancelTaskException (org.apache.flink.runtime.execution.CancelTaskException)8 TestTaskStateManager (org.apache.flink.runtime.state.TestTaskStateManager)8 CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)7 FlinkRuntimeException (org.apache.flink.util.FlinkRuntimeException)7 CompletableFuture (java.util.concurrent.CompletableFuture)6