Search in sources :

Example 41 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class SubtaskCheckpointCoordinatorTest method testNotifyCheckpointAbortedBeforeAsyncPhase.

@Test
public void testNotifyCheckpointAbortedBeforeAsyncPhase() throws Exception {
    TestTaskStateManager stateManager = new TestTaskStateManager();
    MockEnvironment mockEnvironment = MockEnvironment.builder().setTaskStateManager(stateManager).build();
    try (SubtaskCheckpointCoordinatorImpl subtaskCheckpointCoordinator = (SubtaskCheckpointCoordinatorImpl) new MockSubtaskCheckpointCoordinatorBuilder().setEnvironment(mockEnvironment).setUnalignedCheckpointEnabled(true).build()) {
        CheckpointOperator checkpointOperator = new CheckpointOperator(new OperatorSnapshotFutures());
        final OperatorChain<String, AbstractStreamOperator<String>> operatorChain = operatorChain(checkpointOperator);
        long checkpointId = 42L;
        // notify checkpoint aborted before execution.
        subtaskCheckpointCoordinator.notifyCheckpointAborted(checkpointId, operatorChain, () -> true);
        assertEquals(1, subtaskCheckpointCoordinator.getAbortedCheckpointSize());
        subtaskCheckpointCoordinator.getChannelStateWriter().start(checkpointId, CheckpointOptions.forCheckpointWithDefaultLocation());
        subtaskCheckpointCoordinator.checkpointState(new CheckpointMetaData(checkpointId, System.currentTimeMillis()), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder(), operatorChain, false, () -> false);
        assertFalse(checkpointOperator.isCheckpointed());
        assertEquals(-1, stateManager.getReportedCheckpointId());
        assertEquals(0, subtaskCheckpointCoordinator.getAbortedCheckpointSize());
        assertEquals(0, subtaskCheckpointCoordinator.getAsyncCheckpointRunnableSize());
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) TestTaskStateManager(org.apache.flink.runtime.state.TestTaskStateManager) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) Test(org.junit.Test)

Example 42 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testCheckpointDoneOnFinishedOperator.

@Test
public void testCheckpointDoneOnFinishedOperator() throws Exception {
    FinishingOperator finishingOperator = new FinishingOperator();
    StreamTaskMailboxTestHarnessBuilder<Integer> builder = new StreamTaskMailboxTestHarnessBuilder<>(OneInputStreamTask::new, BasicTypeInfo.INT_TYPE_INFO).addInput(BasicTypeInfo.INT_TYPE_INFO);
    StreamTaskMailboxTestHarness<Integer> harness = builder.setupOutputForSingletonOperatorChain(finishingOperator).build();
    // keeps the mailbox from suspending
    harness.setAutoProcess(false);
    harness.processElement(new StreamRecord<>(1));
    harness.streamTask.operatorChain.finishOperators(harness.streamTask.getActionExecutor(), StopMode.DRAIN);
    assertTrue(FinishingOperator.finished);
    harness.getTaskStateManager().getWaitForReportLatch().reset();
    harness.streamTask.triggerCheckpointOnBarrier(new CheckpointMetaData(2, 0), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0L).setAlignmentDurationNanos(0L));
    harness.getTaskStateManager().getWaitForReportLatch().await();
    assertEquals(2, harness.getTaskStateManager().getReportedCheckpointId());
}
Also used : CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Test(org.junit.Test)

Example 43 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class SourceStreamTaskTestBase method testMetrics.

public void testMetrics(FunctionWithException<Environment, ? extends StreamTask<Integer, ?>, Exception> taskFactory, StreamOperatorFactory<?> operatorFactory, Matcher<Double> busyTimeMatcher) throws Exception {
    long sleepTime = 42;
    StreamTaskMailboxTestHarnessBuilder<Integer> builder = new StreamTaskMailboxTestHarnessBuilder<>(taskFactory, INT_TYPE_INFO);
    final Map<String, Metric> metrics = new ConcurrentHashMap<>();
    final TaskMetricGroup taskMetricGroup = StreamTaskTestHarness.createTaskMetricGroup(metrics);
    try (StreamTaskMailboxTestHarness<Integer> harness = builder.setupOutputForSingletonOperatorChain(operatorFactory).setTaskMetricGroup(taskMetricGroup).build()) {
        Future<Boolean> triggerFuture = harness.streamTask.triggerCheckpointAsync(new CheckpointMetaData(1L, System.currentTimeMillis()), CheckpointOptions.forCheckpointWithDefaultLocation());
        OneShotLatch checkpointAcknowledgeLatch = new OneShotLatch();
        harness.getCheckpointResponder().setAcknowledgeLatch(checkpointAcknowledgeLatch);
        assertFalse(triggerFuture.isDone());
        Thread.sleep(sleepTime);
        while (!triggerFuture.isDone()) {
            harness.streamTask.runMailboxStep();
        }
        Gauge<Long> checkpointStartDelayGauge = (Gauge<Long>) metrics.get(MetricNames.CHECKPOINT_START_DELAY_TIME);
        assertThat(checkpointStartDelayGauge.getValue(), greaterThanOrEqualTo(sleepTime * 1_000_000));
        Gauge<Double> busyTimeGauge = (Gauge<Double>) metrics.get(MetricNames.TASK_BUSY_TIME);
        assertThat(busyTimeGauge.getValue(), busyTimeMatcher);
        checkpointAcknowledgeLatch.await();
        TestCheckpointResponder.AcknowledgeReport acknowledgeReport = Iterables.getOnlyElement(harness.getCheckpointResponder().getAcknowledgeReports());
        assertThat(acknowledgeReport.getCheckpointMetrics().getCheckpointStartDelayNanos(), greaterThanOrEqualTo(sleepTime * 1_000_000));
    }
}
Also used : TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Gauge(org.apache.flink.metrics.Gauge) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Metric(org.apache.flink.metrics.Metric) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 44 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class CheckpointBarrierHandler method notifyCheckpoint.

protected void notifyCheckpoint(CheckpointBarrier checkpointBarrier) throws IOException {
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointBarrier.getId(), checkpointBarrier.getTimestamp(), System.currentTimeMillis());
    CheckpointMetricsBuilder checkpointMetrics;
    if (checkpointBarrier.getId() == startAlignmentCheckpointId) {
        checkpointMetrics = new CheckpointMetricsBuilder().setAlignmentDurationNanos(latestAlignmentDurationNanos).setBytesProcessedDuringAlignment(latestBytesProcessedDuringAlignment).setCheckpointStartDelayNanos(latestCheckpointStartDelayNanos);
    } else {
        checkpointMetrics = new CheckpointMetricsBuilder().setAlignmentDurationNanos(0L).setBytesProcessedDuringAlignment(0L).setCheckpointStartDelayNanos(0);
    }
    toNotifyOnCheckpoint.triggerCheckpointOnBarrier(checkpointMetaData, checkpointBarrier.getCheckpointOptions(), checkpointMetrics);
}
Also used : CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData)

Example 45 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class TaskStateManagerImplTest method testStateReportingAndRetrieving.

/**
 * Test reporting and retrieving prioritized local and remote state.
 */
@Test
public void testStateReportingAndRetrieving() {
    JobID jobID = new JobID();
    ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
    TestCheckpointResponder testCheckpointResponder = new TestCheckpointResponder();
    TestTaskLocalStateStore testTaskLocalStateStore = new TestTaskLocalStateStore();
    InMemoryStateChangelogStorage changelogStorage = new InMemoryStateChangelogStorage();
    TaskStateManager taskStateManager = taskStateManager(jobID, executionAttemptID, testCheckpointResponder, null, testTaskLocalStateStore, changelogStorage);
    // ---------------------------------------- test reporting
    // -----------------------------------------
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(74L, 11L);
    CheckpointMetrics checkpointMetrics = new CheckpointMetrics();
    TaskStateSnapshot jmTaskStateSnapshot = new TaskStateSnapshot();
    OperatorID operatorID_1 = new OperatorID(1L, 1L);
    OperatorID operatorID_2 = new OperatorID(2L, 2L);
    OperatorID operatorID_3 = new OperatorID(3L, 3L);
    Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_1).isRestored());
    Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_2).isRestored());
    Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_3).isRestored());
    KeyGroupRange keyGroupRange = new KeyGroupRange(0, 1);
    // Remote state of operator 1 has only managed keyed state.
    OperatorSubtaskState jmOperatorSubtaskState_1 = OperatorSubtaskState.builder().setManagedKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
    // Remote state of operator 1 has only raw keyed state.
    OperatorSubtaskState jmOperatorSubtaskState_2 = OperatorSubtaskState.builder().setRawKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
    jmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_1, jmOperatorSubtaskState_1);
    jmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_2, jmOperatorSubtaskState_2);
    TaskStateSnapshot tmTaskStateSnapshot = new TaskStateSnapshot();
    // Only operator 1 has a local alternative for the managed keyed state.
    OperatorSubtaskState tmOperatorSubtaskState_1 = OperatorSubtaskState.builder().setManagedKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
    tmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_1, tmOperatorSubtaskState_1);
    taskStateManager.reportTaskStateSnapshots(checkpointMetaData, checkpointMetrics, jmTaskStateSnapshot, tmTaskStateSnapshot);
    TestCheckpointResponder.AcknowledgeReport acknowledgeReport = testCheckpointResponder.getAcknowledgeReports().get(0);
    // checks that the checkpoint responder and the local state store received state as
    // expected.
    Assert.assertEquals(checkpointMetaData.getCheckpointId(), acknowledgeReport.getCheckpointId());
    Assert.assertEquals(checkpointMetrics, acknowledgeReport.getCheckpointMetrics());
    Assert.assertEquals(executionAttemptID, acknowledgeReport.getExecutionAttemptID());
    Assert.assertEquals(jobID, acknowledgeReport.getJobID());
    Assert.assertEquals(jmTaskStateSnapshot, acknowledgeReport.getSubtaskState());
    Assert.assertEquals(tmTaskStateSnapshot, testTaskLocalStateStore.retrieveLocalState(checkpointMetaData.getCheckpointId()));
    // -------------------------------------- test prio retrieving
    // ---------------------------------------
    JobManagerTaskRestore taskRestore = new JobManagerTaskRestore(checkpointMetaData.getCheckpointId(), acknowledgeReport.getSubtaskState());
    taskStateManager = taskStateManager(jobID, executionAttemptID, testCheckpointResponder, taskRestore, testTaskLocalStateStore, changelogStorage);
    // this has remote AND local managed keyed state.
    PrioritizedOperatorSubtaskState prioritized_1 = taskStateManager.prioritizedOperatorState(operatorID_1);
    // this has only remote raw keyed state.
    PrioritizedOperatorSubtaskState prioritized_2 = taskStateManager.prioritizedOperatorState(operatorID_2);
    // not restored.
    PrioritizedOperatorSubtaskState prioritized_3 = taskStateManager.prioritizedOperatorState(operatorID_3);
    Assert.assertTrue(prioritized_1.isRestored());
    Assert.assertTrue(prioritized_2.isRestored());
    Assert.assertTrue(prioritized_3.isRestored());
    Assert.assertTrue(taskStateManager.prioritizedOperatorState(new OperatorID()).isRestored());
    // checks for operator 1.
    Iterator<StateObjectCollection<KeyedStateHandle>> prioritizedManagedKeyedState_1 = prioritized_1.getPrioritizedManagedKeyedState().iterator();
    Assert.assertTrue(prioritizedManagedKeyedState_1.hasNext());
    StateObjectCollection<KeyedStateHandle> current = prioritizedManagedKeyedState_1.next();
    KeyedStateHandle keyedStateHandleExp = tmOperatorSubtaskState_1.getManagedKeyedState().iterator().next();
    KeyedStateHandle keyedStateHandleAct = current.iterator().next();
    Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
    Assert.assertTrue(prioritizedManagedKeyedState_1.hasNext());
    current = prioritizedManagedKeyedState_1.next();
    keyedStateHandleExp = jmOperatorSubtaskState_1.getManagedKeyedState().iterator().next();
    keyedStateHandleAct = current.iterator().next();
    Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
    Assert.assertFalse(prioritizedManagedKeyedState_1.hasNext());
    // checks for operator 2.
    Iterator<StateObjectCollection<KeyedStateHandle>> prioritizedRawKeyedState_2 = prioritized_2.getPrioritizedRawKeyedState().iterator();
    Assert.assertTrue(prioritizedRawKeyedState_2.hasNext());
    current = prioritizedRawKeyedState_2.next();
    keyedStateHandleExp = jmOperatorSubtaskState_2.getRawKeyedState().iterator().next();
    keyedStateHandleAct = current.iterator().next();
    Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
    Assert.assertFalse(prioritizedRawKeyedState_2.hasNext());
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) PrioritizedOperatorSubtaskState(org.apache.flink.runtime.checkpoint.PrioritizedOperatorSubtaskState) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) PrioritizedOperatorSubtaskState(org.apache.flink.runtime.checkpoint.PrioritizedOperatorSubtaskState) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) StateObjectCollection(org.apache.flink.runtime.checkpoint.StateObjectCollection) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)47 Test (org.junit.Test)33 CheckpointMetricsBuilder (org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder)16 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)15 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)13 IOException (java.io.IOException)12 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)12 MockEnvironment (org.apache.flink.runtime.operators.testutils.MockEnvironment)11 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)11 OperatorSnapshotFutures (org.apache.flink.streaming.api.operators.OperatorSnapshotFutures)11 JobID (org.apache.flink.api.common.JobID)10 CheckpointException (org.apache.flink.runtime.checkpoint.CheckpointException)10 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)10 ExecutionException (java.util.concurrent.ExecutionException)9 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)9 CancelTaskException (org.apache.flink.runtime.execution.CancelTaskException)8 TestTaskStateManager (org.apache.flink.runtime.state.TestTaskStateManager)8 CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)7 FlinkRuntimeException (org.apache.flink.util.FlinkRuntimeException)7 CompletableFuture (java.util.concurrent.CompletableFuture)6