Search in sources :

Example 31 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class AbstractStreamOperatorTestHarness method initializeState.

/**
 * Calls {@link org.apache.flink.streaming.api.operators.StreamOperator#initializeState()}.
 * Calls {@link
 * org.apache.flink.streaming.api.operators.SetupableStreamOperator#setup(StreamTask,
 * StreamConfig, Output)} if it was not called before.
 *
 * @param jmOperatorStateHandles the primary state (owned by JM)
 * @param tmOperatorStateHandles the (optional) local state (owned by TM) or null.
 * @throws Exception
 */
public void initializeState(OperatorSubtaskState jmOperatorStateHandles, OperatorSubtaskState tmOperatorStateHandles) throws Exception {
    checkState(!initializeCalled, "TestHarness has already been initialized. Have you " + "opened this harness before initializing it?");
    if (!setupCalled) {
        setup();
    }
    if (jmOperatorStateHandles != null) {
        TaskStateSnapshot jmTaskStateSnapshot = new TaskStateSnapshot();
        jmTaskStateSnapshot.putSubtaskStateByOperatorID(operator.getOperatorID(), jmOperatorStateHandles);
        taskStateManager.setReportedCheckpointId(0);
        taskStateManager.setJobManagerTaskStateSnapshotsByCheckpointId(Collections.singletonMap(0L, jmTaskStateSnapshot));
        if (tmOperatorStateHandles != null) {
            TaskStateSnapshot tmTaskStateSnapshot = new TaskStateSnapshot();
            tmTaskStateSnapshot.putSubtaskStateByOperatorID(operator.getOperatorID(), tmOperatorStateHandles);
            taskStateManager.setTaskManagerTaskStateSnapshotsByCheckpointId(Collections.singletonMap(0L, tmTaskStateSnapshot));
        }
    }
    operator.initializeState(mockTask.createStreamTaskStateInitializer());
    initializeCalled = true;
}
Also used : TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot)

Example 32 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class SourceStreamTaskTest method testTriggeringCheckpointAfterSourceThreadFinished.

@Test
public void testTriggeringCheckpointAfterSourceThreadFinished() throws Exception {
    ResultPartition[] partitionWriters = new ResultPartition[2];
    try (NettyShuffleEnvironment env = new NettyShuffleEnvironmentBuilder().setNumNetworkBuffers(partitionWriters.length * 2).build()) {
        for (int i = 0; i < partitionWriters.length; ++i) {
            partitionWriters[i] = PartitionTestUtils.createPartition(env, ResultPartitionType.PIPELINED_BOUNDED, 1);
            partitionWriters[i].setup();
        }
        final CompletableFuture<Long> checkpointCompleted = new CompletableFuture<>();
        try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO).modifyStreamConfig(config -> config.setCheckpointingEnabled(true)).setCheckpointResponder(new TestCheckpointResponder() {

            @Override
            public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
                super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
                checkpointCompleted.complete(checkpointId);
            }
        }).addAdditionalOutput(partitionWriters).setupOperatorChain(new StreamSource<>(new MockSource(0, 0, 1))).finishForSingletonOperatorChain(StringSerializer.INSTANCE).build()) {
            testHarness.processAll();
            CompletableFuture<Void> taskFinished = testHarness.getStreamTask().getCompletionFuture();
            do {
                testHarness.processAll();
            } while (!taskFinished.isDone());
            Future<Boolean> checkpointFuture = triggerCheckpoint(testHarness, 2);
            // Notifies the result partition that all records are processed after the
            // last checkpoint is triggered.
            checkState(checkpointFuture instanceof CompletableFuture, "The trigger future should " + " be also CompletableFuture.");
            ((CompletableFuture<?>) checkpointFuture).thenAccept((ignored) -> {
                for (ResultPartition resultPartition : partitionWriters) {
                    resultPartition.onSubpartitionAllDataProcessed(0);
                }
            });
            checkpointCompleted.whenComplete((id, error) -> testHarness.getStreamTask().notifyCheckpointCompleteAsync(2));
            testHarness.finishProcessing();
            assertTrue(checkpointFuture.isDone());
            // EndOfUserRecordEvent.
            for (ResultPartition resultPartition : partitionWriters) {
                assertEquals(3, resultPartition.getNumberOfQueuedBuffers());
            }
        }
    } finally {
        for (ResultPartitionWriter writer : partitionWriters) {
            if (writer != null) {
                writer.close();
            }
        }
    }
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) SavepointType(org.apache.flink.runtime.checkpoint.SavepointType) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Random(java.util.Random) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) StreamTaskFinalCheckpointsTest.triggerCheckpoint(org.apache.flink.streaming.runtime.tasks.StreamTaskFinalCheckpointsTest.triggerCheckpoint) StreamElementSerializer(org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) LifeCyclePhase(org.apache.flink.streaming.runtime.tasks.LifeCycleMonitor.LifeCyclePhase) Future(java.util.concurrent.Future) RichSourceFunction(org.apache.flink.streaming.api.functions.source.RichSourceFunction) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) FromElementsFunction(org.apache.flink.streaming.api.functions.source.FromElementsFunction) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) ExpectedTestException(org.apache.flink.runtime.operators.testutils.ExpectedTestException) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) Executors(java.util.concurrent.Executors) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) SimpleOperatorFactory(org.apache.flink.streaming.api.operators.SimpleOperatorFactory) Serializable(java.io.Serializable) StopMode(org.apache.flink.runtime.io.network.api.StopMode) List(java.util.List) Matchers.contains(org.hamcrest.Matchers.contains) Assert.assertFalse(org.junit.Assert.assertFalse) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) RecordOrEventCollectingResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.RecordOrEventCollectingResultPartitionWriter) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) INT_TYPE_INFO(org.apache.flink.api.common.typeinfo.BasicTypeInfo.INT_TYPE_INFO) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Watermark(org.apache.flink.streaming.api.watermark.Watermark) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) Callable(java.util.concurrent.Callable) CompletableFuture(java.util.concurrent.CompletableFuture) STRING_TYPE_INFO(org.apache.flink.api.common.typeinfo.BasicTypeInfo.STRING_TYPE_INFO) ArrayList(java.util.ArrayList) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TestHarnessUtil(org.apache.flink.streaming.util.TestHarnessUtil) CheckedSupplier(org.apache.flink.util.function.CheckedSupplier) ExecutorService(java.util.concurrent.ExecutorService) CheckpointStorageLocationReference(org.apache.flink.runtime.state.CheckpointStorageLocationReference) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) TimeCharacteristic(org.apache.flink.streaming.api.TimeCharacteristic) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) Semaphore(java.util.concurrent.Semaphore) Configuration(org.apache.flink.configuration.Configuration) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) AtomicLong(java.util.concurrent.atomic.AtomicLong) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) ListCheckpointed(org.apache.flink.streaming.api.checkpoint.ListCheckpointed) PartitionTestUtils(org.apache.flink.runtime.io.network.partition.PartitionTestUtils) Assert(org.junit.Assert) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) CompletableFuture(java.util.concurrent.CompletableFuture) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) RecordOrEventCollectingResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.RecordOrEventCollectingResultPartitionWriter) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) StreamTaskFinalCheckpointsTest.triggerCheckpoint(org.apache.flink.streaming.runtime.tasks.StreamTaskFinalCheckpointsTest.triggerCheckpoint) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) AtomicLong(java.util.concurrent.atomic.AtomicLong) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 33 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class TaskStateManagerImplTest method testStateReportingAndRetrieving.

/**
 * Test reporting and retrieving prioritized local and remote state.
 */
@Test
public void testStateReportingAndRetrieving() {
    JobID jobID = new JobID();
    ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
    TestCheckpointResponder testCheckpointResponder = new TestCheckpointResponder();
    TestTaskLocalStateStore testTaskLocalStateStore = new TestTaskLocalStateStore();
    InMemoryStateChangelogStorage changelogStorage = new InMemoryStateChangelogStorage();
    TaskStateManager taskStateManager = taskStateManager(jobID, executionAttemptID, testCheckpointResponder, null, testTaskLocalStateStore, changelogStorage);
    // ---------------------------------------- test reporting
    // -----------------------------------------
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(74L, 11L);
    CheckpointMetrics checkpointMetrics = new CheckpointMetrics();
    TaskStateSnapshot jmTaskStateSnapshot = new TaskStateSnapshot();
    OperatorID operatorID_1 = new OperatorID(1L, 1L);
    OperatorID operatorID_2 = new OperatorID(2L, 2L);
    OperatorID operatorID_3 = new OperatorID(3L, 3L);
    Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_1).isRestored());
    Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_2).isRestored());
    Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_3).isRestored());
    KeyGroupRange keyGroupRange = new KeyGroupRange(0, 1);
    // Remote state of operator 1 has only managed keyed state.
    OperatorSubtaskState jmOperatorSubtaskState_1 = OperatorSubtaskState.builder().setManagedKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
    // Remote state of operator 1 has only raw keyed state.
    OperatorSubtaskState jmOperatorSubtaskState_2 = OperatorSubtaskState.builder().setRawKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
    jmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_1, jmOperatorSubtaskState_1);
    jmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_2, jmOperatorSubtaskState_2);
    TaskStateSnapshot tmTaskStateSnapshot = new TaskStateSnapshot();
    // Only operator 1 has a local alternative for the managed keyed state.
    OperatorSubtaskState tmOperatorSubtaskState_1 = OperatorSubtaskState.builder().setManagedKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
    tmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_1, tmOperatorSubtaskState_1);
    taskStateManager.reportTaskStateSnapshots(checkpointMetaData, checkpointMetrics, jmTaskStateSnapshot, tmTaskStateSnapshot);
    TestCheckpointResponder.AcknowledgeReport acknowledgeReport = testCheckpointResponder.getAcknowledgeReports().get(0);
    // checks that the checkpoint responder and the local state store received state as
    // expected.
    Assert.assertEquals(checkpointMetaData.getCheckpointId(), acknowledgeReport.getCheckpointId());
    Assert.assertEquals(checkpointMetrics, acknowledgeReport.getCheckpointMetrics());
    Assert.assertEquals(executionAttemptID, acknowledgeReport.getExecutionAttemptID());
    Assert.assertEquals(jobID, acknowledgeReport.getJobID());
    Assert.assertEquals(jmTaskStateSnapshot, acknowledgeReport.getSubtaskState());
    Assert.assertEquals(tmTaskStateSnapshot, testTaskLocalStateStore.retrieveLocalState(checkpointMetaData.getCheckpointId()));
    // -------------------------------------- test prio retrieving
    // ---------------------------------------
    JobManagerTaskRestore taskRestore = new JobManagerTaskRestore(checkpointMetaData.getCheckpointId(), acknowledgeReport.getSubtaskState());
    taskStateManager = taskStateManager(jobID, executionAttemptID, testCheckpointResponder, taskRestore, testTaskLocalStateStore, changelogStorage);
    // this has remote AND local managed keyed state.
    PrioritizedOperatorSubtaskState prioritized_1 = taskStateManager.prioritizedOperatorState(operatorID_1);
    // this has only remote raw keyed state.
    PrioritizedOperatorSubtaskState prioritized_2 = taskStateManager.prioritizedOperatorState(operatorID_2);
    // not restored.
    PrioritizedOperatorSubtaskState prioritized_3 = taskStateManager.prioritizedOperatorState(operatorID_3);
    Assert.assertTrue(prioritized_1.isRestored());
    Assert.assertTrue(prioritized_2.isRestored());
    Assert.assertTrue(prioritized_3.isRestored());
    Assert.assertTrue(taskStateManager.prioritizedOperatorState(new OperatorID()).isRestored());
    // checks for operator 1.
    Iterator<StateObjectCollection<KeyedStateHandle>> prioritizedManagedKeyedState_1 = prioritized_1.getPrioritizedManagedKeyedState().iterator();
    Assert.assertTrue(prioritizedManagedKeyedState_1.hasNext());
    StateObjectCollection<KeyedStateHandle> current = prioritizedManagedKeyedState_1.next();
    KeyedStateHandle keyedStateHandleExp = tmOperatorSubtaskState_1.getManagedKeyedState().iterator().next();
    KeyedStateHandle keyedStateHandleAct = current.iterator().next();
    Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
    Assert.assertTrue(prioritizedManagedKeyedState_1.hasNext());
    current = prioritizedManagedKeyedState_1.next();
    keyedStateHandleExp = jmOperatorSubtaskState_1.getManagedKeyedState().iterator().next();
    keyedStateHandleAct = current.iterator().next();
    Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
    Assert.assertFalse(prioritizedManagedKeyedState_1.hasNext());
    // checks for operator 2.
    Iterator<StateObjectCollection<KeyedStateHandle>> prioritizedRawKeyedState_2 = prioritized_2.getPrioritizedRawKeyedState().iterator();
    Assert.assertTrue(prioritizedRawKeyedState_2.hasNext());
    current = prioritizedRawKeyedState_2.next();
    keyedStateHandleExp = jmOperatorSubtaskState_2.getRawKeyedState().iterator().next();
    keyedStateHandleAct = current.iterator().next();
    Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
    Assert.assertFalse(prioritizedRawKeyedState_2.hasNext());
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) PrioritizedOperatorSubtaskState(org.apache.flink.runtime.checkpoint.PrioritizedOperatorSubtaskState) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) PrioritizedOperatorSubtaskState(org.apache.flink.runtime.checkpoint.PrioritizedOperatorSubtaskState) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) StateObjectCollection(org.apache.flink.runtime.checkpoint.StateObjectCollection) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 34 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class TaskStateManagerImplTest method testStateRetrievingWithFinishedOperator.

@Test
public void testStateRetrievingWithFinishedOperator() {
    TaskStateSnapshot taskStateSnapshot = TaskStateSnapshot.FINISHED_ON_RESTORE;
    JobManagerTaskRestore jobManagerTaskRestore = new JobManagerTaskRestore(2, taskStateSnapshot);
    TaskStateManagerImpl stateManager = new TaskStateManagerImpl(new JobID(), new ExecutionAttemptID(), new TestTaskLocalStateStore(), null, jobManagerTaskRestore, new TestCheckpointResponder());
    Assert.assertTrue(stateManager.isTaskDeployedAsFinished());
}
Also used : TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 35 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class TaskLocalStateStoreImplTest method retrievePersistedLocalStateFromDisc.

@Test
public void retrievePersistedLocalStateFromDisc() {
    final TaskStateSnapshot taskStateSnapshot = createTaskStateSnapshot();
    final long checkpointId = 0L;
    taskLocalStateStore.storeLocalState(checkpointId, taskStateSnapshot);
    final TaskLocalStateStoreImpl newTaskLocalStateStore = createTaskLocalStateStoreImpl(allocationBaseDirs, jobID, allocationID, jobVertexID, 0);
    final TaskStateSnapshot retrievedTaskStateSnapshot = newTaskLocalStateStore.retrieveLocalState(checkpointId);
    assertThat(retrievedTaskStateSnapshot).isEqualTo(taskStateSnapshot);
}
Also used : TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) Test(org.junit.Test)

Aggregations

TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)42 Test (org.junit.Test)28 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)19 JobID (org.apache.flink.api.common.JobID)17 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)16 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)13 JobManagerTaskRestore (org.apache.flink.runtime.checkpoint.JobManagerTaskRestore)13 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)13 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)12 TestCheckpointResponder (org.apache.flink.runtime.taskmanager.TestCheckpointResponder)9 CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)8 IOException (java.io.IOException)7 HashMap (java.util.HashMap)6 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)6 CompletableFuture (java.util.concurrent.CompletableFuture)5 KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)5 TestTaskStateManager (org.apache.flink.runtime.state.TestTaskStateManager)5 InMemoryStateChangelogStorage (org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4